sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 date_delta_sql, 12 date_trunc_to_time, 13 datestrtodate_sql, 14 build_formatted_time, 15 if_sql, 16 inline_array_sql, 17 max_or_greatest, 18 min_or_least, 19 rename_func, 20 timestamptrunc_sql, 21 timestrtotime_sql, 22 var_map_sql, 23) 24from sqlglot.helper import flatten, is_float, is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _build_datetime( 33 name: str, kind: exp.DataType.Type, safe: bool = False 34) -> t.Callable[[t.List], exp.Func]: 35 def _builder(args: t.List) -> exp.Func: 36 value = seq_get(args, 0) 37 int_value = value is not None and is_int(value.name) 38 39 if isinstance(value, exp.Literal): 40 # Converts calls like `TO_TIME('01:02:03')` into casts 41 if len(args) == 1 and value.is_string and not int_value: 42 return exp.cast(value, kind) 43 44 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 45 # cases so we can transpile them, since they're relatively common 46 if kind == exp.DataType.Type.TIMESTAMP: 47 if int_value: 48 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 49 if not is_float(value.this): 50 return build_formatted_time(exp.StrToTime, "snowflake")(args) 51 52 if kind == exp.DataType.Type.DATE and not int_value: 53 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 54 formatted_exp.set("safe", safe) 55 return formatted_exp 56 57 return exp.Anonymous(this=name, expressions=args) 58 59 return _builder 60 61 62def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 63 expression = parser.build_var_map(args) 64 65 if isinstance(expression, exp.StarMap): 66 return expression 67 68 return exp.Struct( 69 expressions=[ 70 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 71 ] 72 ) 73 74 75def _build_datediff(args: t.List) -> exp.DateDiff: 76 return exp.DateDiff( 77 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 78 ) 79 80 81def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 82 def _builder(args: t.List) -> E: 83 return expr_type( 84 this=seq_get(args, 2), 85 expression=seq_get(args, 1), 86 unit=_map_date_part(seq_get(args, 0)), 87 ) 88 89 return _builder 90 91 92# https://docs.snowflake.com/en/sql-reference/functions/div0 93def _build_if_from_div0(args: t.List) -> exp.If: 94 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 95 true = exp.Literal.number(0) 96 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 97 return exp.If(this=cond, true=true, false=false) 98 99 100# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 101def _build_if_from_zeroifnull(args: t.List) -> exp.If: 102 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 103 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 104 105 106# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 107def _build_if_from_nullifzero(args: t.List) -> exp.If: 108 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 109 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 110 111 112def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 113 flag = expression.text("flag") 114 115 if "i" not in flag: 116 flag += "i" 117 118 return self.func( 119 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 120 ) 121 122 123def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 124 if len(args) == 3: 125 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 126 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 127 128 129def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 130 regexp_replace = exp.RegexpReplace.from_arg_list(args) 131 132 if not regexp_replace.args.get("replacement"): 133 regexp_replace.set("replacement", exp.Literal.string("")) 134 135 return regexp_replace 136 137 138def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 139 def _parse(self: Snowflake.Parser) -> exp.Show: 140 return self._parse_show_snowflake(*args, **kwargs) 141 142 return _parse 143 144 145DATE_PART_MAPPING = { 146 "Y": "YEAR", 147 "YY": "YEAR", 148 "YYY": "YEAR", 149 "YYYY": "YEAR", 150 "YR": "YEAR", 151 "YEARS": "YEAR", 152 "YRS": "YEAR", 153 "MM": "MONTH", 154 "MON": "MONTH", 155 "MONS": "MONTH", 156 "MONTHS": "MONTH", 157 "D": "DAY", 158 "DD": "DAY", 159 "DAYS": "DAY", 160 "DAYOFMONTH": "DAY", 161 "WEEKDAY": "DAYOFWEEK", 162 "DOW": "DAYOFWEEK", 163 "DW": "DAYOFWEEK", 164 "WEEKDAY_ISO": "DAYOFWEEKISO", 165 "DOW_ISO": "DAYOFWEEKISO", 166 "DW_ISO": "DAYOFWEEKISO", 167 "YEARDAY": "DAYOFYEAR", 168 "DOY": "DAYOFYEAR", 169 "DY": "DAYOFYEAR", 170 "W": "WEEK", 171 "WK": "WEEK", 172 "WEEKOFYEAR": "WEEK", 173 "WOY": "WEEK", 174 "WY": "WEEK", 175 "WEEK_ISO": "WEEKISO", 176 "WEEKOFYEARISO": "WEEKISO", 177 "WEEKOFYEAR_ISO": "WEEKISO", 178 "Q": "QUARTER", 179 "QTR": "QUARTER", 180 "QTRS": "QUARTER", 181 "QUARTERS": "QUARTER", 182 "H": "HOUR", 183 "HH": "HOUR", 184 "HR": "HOUR", 185 "HOURS": "HOUR", 186 "HRS": "HOUR", 187 "M": "MINUTE", 188 "MI": "MINUTE", 189 "MIN": "MINUTE", 190 "MINUTES": "MINUTE", 191 "MINS": "MINUTE", 192 "S": "SECOND", 193 "SEC": "SECOND", 194 "SECONDS": "SECOND", 195 "SECS": "SECOND", 196 "MS": "MILLISECOND", 197 "MSEC": "MILLISECOND", 198 "MILLISECONDS": "MILLISECOND", 199 "US": "MICROSECOND", 200 "USEC": "MICROSECOND", 201 "MICROSECONDS": "MICROSECOND", 202 "NS": "NANOSECOND", 203 "NSEC": "NANOSECOND", 204 "NANOSEC": "NANOSECOND", 205 "NSECOND": "NANOSECOND", 206 "NSECONDS": "NANOSECOND", 207 "NANOSECS": "NANOSECOND", 208 "EPOCH": "EPOCH_SECOND", 209 "EPOCH_SECONDS": "EPOCH_SECOND", 210 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 211 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 212 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 213 "TZH": "TIMEZONE_HOUR", 214 "TZM": "TIMEZONE_MINUTE", 215} 216 217 218@t.overload 219def _map_date_part(part: exp.Expression) -> exp.Var: 220 pass 221 222 223@t.overload 224def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 225 pass 226 227 228def _map_date_part(part): 229 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 230 return exp.var(mapped) if mapped else part 231 232 233def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 234 trunc = date_trunc_to_time(args) 235 trunc.set("unit", _map_date_part(trunc.args["unit"])) 236 return trunc 237 238 239def _build_timestamp_from_parts(args: t.List) -> exp.Func: 240 if len(args) == 2: 241 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 242 # so we parse this into Anonymous for now instead of introducing complexity 243 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 244 245 return exp.TimestampFromParts.from_arg_list(args) 246 247 248def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 249 """ 250 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 251 so we need to unqualify them. 252 253 Example: 254 >>> from sqlglot import parse_one 255 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 256 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 257 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 258 """ 259 if isinstance(expression, exp.Pivot) and expression.unpivot: 260 expression = transforms.unqualify_columns(expression) 261 262 return expression 263 264 265def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 266 assert isinstance(expression, exp.Create) 267 268 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 269 if expression.this in exp.DataType.NESTED_TYPES: 270 expression.set("expressions", None) 271 return expression 272 273 props = expression.args.get("properties") 274 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 275 for schema_expression in expression.this.expressions: 276 if isinstance(schema_expression, exp.ColumnDef): 277 column_type = schema_expression.kind 278 if isinstance(column_type, exp.DataType): 279 column_type.transform(_flatten_structured_type, copy=False) 280 281 return expression 282 283 284class Snowflake(Dialect): 285 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 286 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 287 NULL_ORDERING = "nulls_are_large" 288 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 289 SUPPORTS_USER_DEFINED_TYPES = False 290 SUPPORTS_SEMI_ANTI_JOIN = False 291 PREFER_CTE_ALIAS_COLUMN = True 292 TABLESAMPLE_SIZE_IS_PERCENT = True 293 COPY_PARAMS_ARE_CSV = False 294 295 TIME_MAPPING = { 296 "YYYY": "%Y", 297 "yyyy": "%Y", 298 "YY": "%y", 299 "yy": "%y", 300 "MMMM": "%B", 301 "mmmm": "%B", 302 "MON": "%b", 303 "mon": "%b", 304 "MM": "%m", 305 "mm": "%m", 306 "DD": "%d", 307 "dd": "%-d", 308 "DY": "%a", 309 "dy": "%w", 310 "HH24": "%H", 311 "hh24": "%H", 312 "HH12": "%I", 313 "hh12": "%I", 314 "MI": "%M", 315 "mi": "%M", 316 "SS": "%S", 317 "ss": "%S", 318 "FF": "%f", 319 "ff": "%f", 320 "FF6": "%f", 321 "ff6": "%f", 322 } 323 324 def quote_identifier(self, expression: E, identify: bool = True) -> E: 325 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 326 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 327 if ( 328 isinstance(expression, exp.Identifier) 329 and isinstance(expression.parent, exp.Table) 330 and expression.name.lower() == "dual" 331 ): 332 return expression # type: ignore 333 334 return super().quote_identifier(expression, identify=identify) 335 336 class Parser(parser.Parser): 337 IDENTIFY_PIVOT_STRINGS = True 338 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 339 340 ID_VAR_TOKENS = { 341 *parser.Parser.ID_VAR_TOKENS, 342 TokenType.MATCH_CONDITION, 343 } 344 345 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 346 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 347 348 FUNCTIONS = { 349 **parser.Parser.FUNCTIONS, 350 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 351 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 352 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 353 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 354 this=seq_get(args, 1), expression=seq_get(args, 0) 355 ), 356 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 357 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 358 start=seq_get(args, 0), 359 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 360 step=seq_get(args, 2), 361 ), 362 "BITXOR": binary_from_function(exp.BitwiseXor), 363 "BIT_XOR": binary_from_function(exp.BitwiseXor), 364 "BOOLXOR": binary_from_function(exp.Xor), 365 "CONVERT_TIMEZONE": _build_convert_timezone, 366 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 367 "DATE_TRUNC": _date_trunc_to_time, 368 "DATEADD": _build_date_time_add(exp.DateAdd), 369 "DATEDIFF": _build_datediff, 370 "DIV0": _build_if_from_div0, 371 "FLATTEN": exp.Explode.from_arg_list, 372 "GET_PATH": lambda args, dialect: exp.JSONExtract( 373 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 374 ), 375 "IFF": exp.If.from_arg_list, 376 "LAST_DAY": lambda args: exp.LastDay( 377 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 378 ), 379 "LISTAGG": exp.GroupConcat.from_arg_list, 380 "MEDIAN": lambda args: exp.PercentileCont( 381 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 382 ), 383 "NULLIFZERO": _build_if_from_nullifzero, 384 "OBJECT_CONSTRUCT": _build_object_construct, 385 "REGEXP_REPLACE": _build_regexp_replace, 386 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 387 "RLIKE": exp.RegexpLike.from_arg_list, 388 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 389 "TIMEADD": _build_date_time_add(exp.TimeAdd), 390 "TIMEDIFF": _build_datediff, 391 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 392 "TIMESTAMPDIFF": _build_datediff, 393 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 394 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 395 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 396 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 397 "TO_NUMBER": lambda args: exp.ToNumber( 398 this=seq_get(args, 0), 399 format=seq_get(args, 1), 400 precision=seq_get(args, 2), 401 scale=seq_get(args, 3), 402 ), 403 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 404 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 405 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 406 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 407 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 408 "TO_VARCHAR": exp.ToChar.from_arg_list, 409 "ZEROIFNULL": _build_if_from_zeroifnull, 410 } 411 412 FUNCTION_PARSERS = { 413 **parser.Parser.FUNCTION_PARSERS, 414 "DATE_PART": lambda self: self._parse_date_part(), 415 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 416 } 417 FUNCTION_PARSERS.pop("TRIM") 418 419 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 420 421 RANGE_PARSERS = { 422 **parser.Parser.RANGE_PARSERS, 423 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 424 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 425 } 426 427 ALTER_PARSERS = { 428 **parser.Parser.ALTER_PARSERS, 429 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 430 "UNSET": lambda self: self.expression( 431 exp.Set, 432 tag=self._match_text_seq("TAG"), 433 expressions=self._parse_csv(self._parse_id_var), 434 unset=True, 435 ), 436 "SWAP": lambda self: self._parse_alter_table_swap(), 437 } 438 439 STATEMENT_PARSERS = { 440 **parser.Parser.STATEMENT_PARSERS, 441 TokenType.SHOW: lambda self: self._parse_show(), 442 } 443 444 PROPERTY_PARSERS = { 445 **parser.Parser.PROPERTY_PARSERS, 446 "LOCATION": lambda self: self._parse_location_property(), 447 } 448 449 TYPE_CONVERTER = { 450 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 451 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 452 } 453 454 SHOW_PARSERS = { 455 "SCHEMAS": _show_parser("SCHEMAS"), 456 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 457 "OBJECTS": _show_parser("OBJECTS"), 458 "TERSE OBJECTS": _show_parser("OBJECTS"), 459 "TABLES": _show_parser("TABLES"), 460 "TERSE TABLES": _show_parser("TABLES"), 461 "VIEWS": _show_parser("VIEWS"), 462 "TERSE VIEWS": _show_parser("VIEWS"), 463 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 464 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 466 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 468 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "SEQUENCES": _show_parser("SEQUENCES"), 470 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 471 "COLUMNS": _show_parser("COLUMNS"), 472 "USERS": _show_parser("USERS"), 473 "TERSE USERS": _show_parser("USERS"), 474 } 475 476 STAGED_FILE_SINGLE_TOKENS = { 477 TokenType.DOT, 478 TokenType.MOD, 479 TokenType.SLASH, 480 } 481 482 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 483 484 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 485 486 def _parse_create(self) -> exp.Create | exp.Command: 487 expression = super()._parse_create() 488 if isinstance(expression, exp.Create) and expression.kind == "TAG": 489 # Replace the Table node with the enclosed Identifier 490 expression.this.replace(expression.this.this) 491 492 return expression 493 494 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 495 this = super()._parse_column_ops(this) 496 497 casts = [] 498 json_path = [] 499 500 while self._match(TokenType.COLON): 501 path = super()._parse_column_ops(self._parse_field(any_token=True)) 502 503 # The cast :: operator has a lower precedence than the extraction operator :, so 504 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 505 while isinstance(path, exp.Cast): 506 casts.append(path.to) 507 path = path.this 508 509 if path: 510 json_path.append(path.sql(dialect="snowflake", copy=False)) 511 512 if json_path: 513 this = self.expression( 514 exp.JSONExtract, 515 this=this, 516 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 517 ) 518 519 while casts: 520 this = self.expression(exp.Cast, this=this, to=casts.pop()) 521 522 return this 523 524 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 525 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 526 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 527 this = self._parse_var() or self._parse_type() 528 529 if not this: 530 return None 531 532 self._match(TokenType.COMMA) 533 expression = self._parse_bitwise() 534 this = _map_date_part(this) 535 name = this.name.upper() 536 537 if name.startswith("EPOCH"): 538 if name == "EPOCH_MILLISECOND": 539 scale = 10**3 540 elif name == "EPOCH_MICROSECOND": 541 scale = 10**6 542 elif name == "EPOCH_NANOSECOND": 543 scale = 10**9 544 else: 545 scale = None 546 547 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 548 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 549 550 if scale: 551 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 552 553 return to_unix 554 555 return self.expression(exp.Extract, this=this, expression=expression) 556 557 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 558 if is_map: 559 # Keys are strings in Snowflake's objects, see also: 560 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 561 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 562 return self._parse_slice(self._parse_string()) 563 564 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 565 566 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 567 lateral = super()._parse_lateral() 568 if not lateral: 569 return lateral 570 571 if isinstance(lateral.this, exp.Explode): 572 table_alias = lateral.args.get("alias") 573 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 574 if table_alias and not table_alias.args.get("columns"): 575 table_alias.set("columns", columns) 576 elif not table_alias: 577 exp.alias_(lateral, "_flattened", table=columns, copy=False) 578 579 return lateral 580 581 def _parse_at_before(self, table: exp.Table) -> exp.Table: 582 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 583 index = self._index 584 if self._match_texts(("AT", "BEFORE")): 585 this = self._prev.text.upper() 586 kind = ( 587 self._match(TokenType.L_PAREN) 588 and self._match_texts(self.HISTORICAL_DATA_KIND) 589 and self._prev.text.upper() 590 ) 591 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 592 593 if expression: 594 self._match_r_paren() 595 when = self.expression( 596 exp.HistoricalData, this=this, kind=kind, expression=expression 597 ) 598 table.set("when", when) 599 else: 600 self._retreat(index) 601 602 return table 603 604 def _parse_table_parts( 605 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 606 ) -> exp.Table: 607 # https://docs.snowflake.com/en/user-guide/querying-stage 608 if self._match(TokenType.STRING, advance=False): 609 table = self._parse_string() 610 elif self._match_text_seq("@", advance=False): 611 table = self._parse_location_path() 612 else: 613 table = None 614 615 if table: 616 file_format = None 617 pattern = None 618 619 wrapped = self._match(TokenType.L_PAREN) 620 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 621 if self._match_text_seq("FILE_FORMAT", "=>"): 622 file_format = self._parse_string() or super()._parse_table_parts( 623 is_db_reference=is_db_reference 624 ) 625 elif self._match_text_seq("PATTERN", "=>"): 626 pattern = self._parse_string() 627 else: 628 break 629 630 self._match(TokenType.COMMA) 631 632 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 633 else: 634 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 635 636 return self._parse_at_before(table) 637 638 def _parse_id_var( 639 self, 640 any_token: bool = True, 641 tokens: t.Optional[t.Collection[TokenType]] = None, 642 ) -> t.Optional[exp.Expression]: 643 if self._match_text_seq("IDENTIFIER", "("): 644 identifier = ( 645 super()._parse_id_var(any_token=any_token, tokens=tokens) 646 or self._parse_string() 647 ) 648 self._match_r_paren() 649 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 650 651 return super()._parse_id_var(any_token=any_token, tokens=tokens) 652 653 def _parse_show_snowflake(self, this: str) -> exp.Show: 654 scope = None 655 scope_kind = None 656 657 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 658 # which is syntactically valid but has no effect on the output 659 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 660 661 history = self._match_text_seq("HISTORY") 662 663 like = self._parse_string() if self._match(TokenType.LIKE) else None 664 665 if self._match(TokenType.IN): 666 if self._match_text_seq("ACCOUNT"): 667 scope_kind = "ACCOUNT" 668 elif self._match_set(self.DB_CREATABLES): 669 scope_kind = self._prev.text.upper() 670 if self._curr: 671 scope = self._parse_table_parts() 672 elif self._curr: 673 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 674 scope = self._parse_table_parts() 675 676 return self.expression( 677 exp.Show, 678 **{ 679 "terse": terse, 680 "this": this, 681 "history": history, 682 "like": like, 683 "scope": scope, 684 "scope_kind": scope_kind, 685 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 686 "limit": self._parse_limit(), 687 "from": self._parse_string() if self._match(TokenType.FROM) else None, 688 }, 689 ) 690 691 def _parse_alter_table_swap(self) -> exp.SwapTable: 692 self._match_text_seq("WITH") 693 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 694 695 def _parse_location_property(self) -> exp.LocationProperty: 696 self._match(TokenType.EQ) 697 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 698 699 def _parse_file_location(self) -> t.Optional[exp.Expression]: 700 # Parse either a subquery or a staged file 701 return ( 702 self._parse_select(table=True) 703 if self._match(TokenType.L_PAREN, advance=False) 704 else self._parse_table_parts() 705 ) 706 707 def _parse_location_path(self) -> exp.Var: 708 parts = [self._advance_any(ignore_reserved=True)] 709 710 # We avoid consuming a comma token because external tables like @foo and @bar 711 # can be joined in a query with a comma separator, as well as closing paren 712 # in case of subqueries 713 while self._is_connected() and not self._match_set( 714 (TokenType.COMMA, TokenType.R_PAREN), advance=False 715 ): 716 parts.append(self._advance_any(ignore_reserved=True)) 717 718 return exp.var("".join(part.text for part in parts if part)) 719 720 class Tokenizer(tokens.Tokenizer): 721 STRING_ESCAPES = ["\\", "'"] 722 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 723 RAW_STRINGS = ["$$"] 724 COMMENTS = ["--", "//", ("/*", "*/")] 725 726 KEYWORDS = { 727 **tokens.Tokenizer.KEYWORDS, 728 "BYTEINT": TokenType.INT, 729 "CHAR VARYING": TokenType.VARCHAR, 730 "CHARACTER VARYING": TokenType.VARCHAR, 731 "EXCLUDE": TokenType.EXCEPT, 732 "ILIKE ANY": TokenType.ILIKE_ANY, 733 "LIKE ANY": TokenType.LIKE_ANY, 734 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 735 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 736 "MINUS": TokenType.EXCEPT, 737 "NCHAR VARYING": TokenType.VARCHAR, 738 "PUT": TokenType.COMMAND, 739 "REMOVE": TokenType.COMMAND, 740 "RM": TokenType.COMMAND, 741 "SAMPLE": TokenType.TABLE_SAMPLE, 742 "SQL_DOUBLE": TokenType.DOUBLE, 743 "SQL_VARCHAR": TokenType.VARCHAR, 744 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 745 "TAG": TokenType.TAG, 746 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 747 "TOP": TokenType.TOP, 748 } 749 750 SINGLE_TOKENS = { 751 **tokens.Tokenizer.SINGLE_TOKENS, 752 "$": TokenType.PARAMETER, 753 } 754 755 VAR_SINGLE_TOKENS = {"$"} 756 757 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 758 759 class Generator(generator.Generator): 760 PARAMETER_TOKEN = "$" 761 MATCHED_BY_SOURCE = False 762 SINGLE_STRING_INTERVAL = True 763 JOIN_HINTS = False 764 TABLE_HINTS = False 765 QUERY_HINTS = False 766 AGGREGATE_FILTER_SUPPORTED = False 767 SUPPORTS_TABLE_COPY = False 768 COLLATE_IS_FUNC = True 769 LIMIT_ONLY_LITERALS = True 770 JSON_KEY_VALUE_PAIR_SEP = "," 771 INSERT_OVERWRITE = " OVERWRITE INTO" 772 STRUCT_DELIMITER = ("(", ")") 773 COPY_PARAMS_ARE_WRAPPED = False 774 COPY_PARAMS_EQ_REQUIRED = True 775 STAR_EXCEPT = "EXCLUDE" 776 777 TRANSFORMS = { 778 **generator.Generator.TRANSFORMS, 779 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 780 exp.ArgMax: rename_func("MAX_BY"), 781 exp.ArgMin: rename_func("MIN_BY"), 782 exp.Array: inline_array_sql, 783 exp.ArrayConcat: rename_func("ARRAY_CAT"), 784 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 785 exp.AtTimeZone: lambda self, e: self.func( 786 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 787 ), 788 exp.BitwiseXor: rename_func("BITXOR"), 789 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 790 exp.DateAdd: date_delta_sql("DATEADD"), 791 exp.DateDiff: date_delta_sql("DATEDIFF"), 792 exp.DateStrToDate: datestrtodate_sql, 793 exp.DayOfMonth: rename_func("DAYOFMONTH"), 794 exp.DayOfWeek: rename_func("DAYOFWEEK"), 795 exp.DayOfYear: rename_func("DAYOFYEAR"), 796 exp.Explode: rename_func("FLATTEN"), 797 exp.Extract: rename_func("DATE_PART"), 798 exp.FromTimeZone: lambda self, e: self.func( 799 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 800 ), 801 exp.GenerateSeries: lambda self, e: self.func( 802 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 803 ), 804 exp.GroupConcat: rename_func("LISTAGG"), 805 exp.If: if_sql(name="IFF", false_value="NULL"), 806 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 807 exp.JSONExtractScalar: lambda self, e: self.func( 808 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 809 ), 810 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 811 exp.JSONPathRoot: lambda *_: "", 812 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 813 exp.LogicalOr: rename_func("BOOLOR_AGG"), 814 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 815 exp.Max: max_or_greatest, 816 exp.Min: min_or_least, 817 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 818 exp.PercentileCont: transforms.preprocess( 819 [transforms.add_within_group_for_percentiles] 820 ), 821 exp.PercentileDisc: transforms.preprocess( 822 [transforms.add_within_group_for_percentiles] 823 ), 824 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 825 exp.RegexpILike: _regexpilike_sql, 826 exp.Rand: rename_func("RANDOM"), 827 exp.Select: transforms.preprocess( 828 [ 829 transforms.eliminate_distinct_on, 830 transforms.explode_to_unnest(), 831 transforms.eliminate_semi_and_anti_joins, 832 ] 833 ), 834 exp.SHA: rename_func("SHA1"), 835 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 836 exp.StartsWith: rename_func("STARTSWITH"), 837 exp.StrPosition: lambda self, e: self.func( 838 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 839 ), 840 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 841 exp.Stuff: rename_func("INSERT"), 842 exp.TimeAdd: date_delta_sql("TIMEADD"), 843 exp.TimestampDiff: lambda self, e: self.func( 844 "TIMESTAMPDIFF", e.unit, e.expression, e.this 845 ), 846 exp.TimestampTrunc: timestamptrunc_sql(), 847 exp.TimeStrToTime: timestrtotime_sql, 848 exp.TimeToStr: lambda self, e: self.func( 849 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 850 ), 851 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 852 exp.ToArray: rename_func("TO_ARRAY"), 853 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 854 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 855 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 856 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 857 exp.TsOrDsToDate: lambda self, e: self.func( 858 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 859 ), 860 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 861 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 862 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 863 exp.Xor: rename_func("BOOLXOR"), 864 } 865 866 SUPPORTED_JSON_PATH_PARTS = { 867 exp.JSONPathKey, 868 exp.JSONPathRoot, 869 exp.JSONPathSubscript, 870 } 871 872 TYPE_MAPPING = { 873 **generator.Generator.TYPE_MAPPING, 874 exp.DataType.Type.NESTED: "OBJECT", 875 exp.DataType.Type.STRUCT: "OBJECT", 876 } 877 878 PROPERTIES_LOCATION = { 879 **generator.Generator.PROPERTIES_LOCATION, 880 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 881 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 882 } 883 884 UNSUPPORTED_VALUES_EXPRESSIONS = { 885 exp.Map, 886 exp.StarMap, 887 exp.Struct, 888 exp.VarMap, 889 } 890 891 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 892 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 893 values_as_table = False 894 895 return super().values_sql(expression, values_as_table=values_as_table) 896 897 def datatype_sql(self, expression: exp.DataType) -> str: 898 expressions = expression.expressions 899 if ( 900 expressions 901 and expression.is_type(*exp.DataType.STRUCT_TYPES) 902 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 903 ): 904 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 905 return "OBJECT" 906 907 return super().datatype_sql(expression) 908 909 def tonumber_sql(self, expression: exp.ToNumber) -> str: 910 return self.func( 911 "TO_NUMBER", 912 expression.this, 913 expression.args.get("format"), 914 expression.args.get("precision"), 915 expression.args.get("scale"), 916 ) 917 918 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 919 milli = expression.args.get("milli") 920 if milli is not None: 921 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 922 expression.set("nano", milli_to_nano) 923 924 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 925 926 def trycast_sql(self, expression: exp.TryCast) -> str: 927 value = expression.this 928 929 if value.type is None: 930 from sqlglot.optimizer.annotate_types import annotate_types 931 932 value = annotate_types(value) 933 934 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 935 return super().trycast_sql(expression) 936 937 # TRY_CAST only works for string values in Snowflake 938 return self.cast_sql(expression) 939 940 def log_sql(self, expression: exp.Log) -> str: 941 if not expression.expression: 942 return self.func("LN", expression.this) 943 944 return super().log_sql(expression) 945 946 def unnest_sql(self, expression: exp.Unnest) -> str: 947 unnest_alias = expression.args.get("alias") 948 offset = expression.args.get("offset") 949 950 columns = [ 951 exp.to_identifier("seq"), 952 exp.to_identifier("key"), 953 exp.to_identifier("path"), 954 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 955 seq_get(unnest_alias.columns if unnest_alias else [], 0) 956 or exp.to_identifier("value"), 957 exp.to_identifier("this"), 958 ] 959 960 if unnest_alias: 961 unnest_alias.set("columns", columns) 962 else: 963 unnest_alias = exp.TableAlias(this="_u", columns=columns) 964 965 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 966 alias = self.sql(unnest_alias) 967 alias = f" AS {alias}" if alias else "" 968 return f"{explode}{alias}" 969 970 def show_sql(self, expression: exp.Show) -> str: 971 terse = "TERSE " if expression.args.get("terse") else "" 972 history = " HISTORY" if expression.args.get("history") else "" 973 like = self.sql(expression, "like") 974 like = f" LIKE {like}" if like else "" 975 976 scope = self.sql(expression, "scope") 977 scope = f" {scope}" if scope else "" 978 979 scope_kind = self.sql(expression, "scope_kind") 980 if scope_kind: 981 scope_kind = f" IN {scope_kind}" 982 983 starts_with = self.sql(expression, "starts_with") 984 if starts_with: 985 starts_with = f" STARTS WITH {starts_with}" 986 987 limit = self.sql(expression, "limit") 988 989 from_ = self.sql(expression, "from") 990 if from_: 991 from_ = f" FROM {from_}" 992 993 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 994 995 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 996 # Other dialects don't support all of the following parameters, so we need to 997 # generate default values as necessary to ensure the transpilation is correct 998 group = expression.args.get("group") 999 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1000 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1001 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1002 1003 return self.func( 1004 "REGEXP_SUBSTR", 1005 expression.this, 1006 expression.expression, 1007 position, 1008 occurrence, 1009 parameters, 1010 group, 1011 ) 1012 1013 def except_op(self, expression: exp.Except) -> str: 1014 if not expression.args.get("distinct"): 1015 self.unsupported("EXCEPT with All is not supported in Snowflake") 1016 return super().except_op(expression) 1017 1018 def intersect_op(self, expression: exp.Intersect) -> str: 1019 if not expression.args.get("distinct"): 1020 self.unsupported("INTERSECT with All is not supported in Snowflake") 1021 return super().intersect_op(expression) 1022 1023 def describe_sql(self, expression: exp.Describe) -> str: 1024 # Default to table if kind is unknown 1025 kind_value = expression.args.get("kind") or "TABLE" 1026 kind = f" {kind_value}" if kind_value else "" 1027 this = f" {self.sql(expression, 'this')}" 1028 expressions = self.expressions(expression, flat=True) 1029 expressions = f" {expressions}" if expressions else "" 1030 return f"DESCRIBE{kind}{this}{expressions}" 1031 1032 def generatedasidentitycolumnconstraint_sql( 1033 self, expression: exp.GeneratedAsIdentityColumnConstraint 1034 ) -> str: 1035 start = expression.args.get("start") 1036 start = f" START {start}" if start else "" 1037 increment = expression.args.get("increment") 1038 increment = f" INCREMENT {increment}" if increment else "" 1039 return f"AUTOINCREMENT{start}{increment}" 1040 1041 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1042 this = self.sql(expression, "this") 1043 return f"SWAP WITH {this}" 1044 1045 def with_properties(self, properties: exp.Properties) -> str: 1046 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 1047 1048 def cluster_sql(self, expression: exp.Cluster) -> str: 1049 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1050 1051 def struct_sql(self, expression: exp.Struct) -> str: 1052 keys = [] 1053 values = [] 1054 1055 for i, e in enumerate(expression.expressions): 1056 if isinstance(e, exp.PropertyEQ): 1057 keys.append( 1058 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1059 ) 1060 values.append(e.expression) 1061 else: 1062 keys.append(exp.Literal.string(f"_{i}")) 1063 values.append(e) 1064 1065 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1066 1067 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1068 if expression.args.get("weight") or expression.args.get("accuracy"): 1069 self.unsupported( 1070 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1071 ) 1072 1073 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
285class Snowflake(Dialect): 286 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 287 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 288 NULL_ORDERING = "nulls_are_large" 289 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 290 SUPPORTS_USER_DEFINED_TYPES = False 291 SUPPORTS_SEMI_ANTI_JOIN = False 292 PREFER_CTE_ALIAS_COLUMN = True 293 TABLESAMPLE_SIZE_IS_PERCENT = True 294 COPY_PARAMS_ARE_CSV = False 295 296 TIME_MAPPING = { 297 "YYYY": "%Y", 298 "yyyy": "%Y", 299 "YY": "%y", 300 "yy": "%y", 301 "MMMM": "%B", 302 "mmmm": "%B", 303 "MON": "%b", 304 "mon": "%b", 305 "MM": "%m", 306 "mm": "%m", 307 "DD": "%d", 308 "dd": "%-d", 309 "DY": "%a", 310 "dy": "%w", 311 "HH24": "%H", 312 "hh24": "%H", 313 "HH12": "%I", 314 "hh12": "%I", 315 "MI": "%M", 316 "mi": "%M", 317 "SS": "%S", 318 "ss": "%S", 319 "FF": "%f", 320 "ff": "%f", 321 "FF6": "%f", 322 "ff6": "%f", 323 } 324 325 def quote_identifier(self, expression: E, identify: bool = True) -> E: 326 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 327 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 328 if ( 329 isinstance(expression, exp.Identifier) 330 and isinstance(expression.parent, exp.Table) 331 and expression.name.lower() == "dual" 332 ): 333 return expression # type: ignore 334 335 return super().quote_identifier(expression, identify=identify) 336 337 class Parser(parser.Parser): 338 IDENTIFY_PIVOT_STRINGS = True 339 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 340 341 ID_VAR_TOKENS = { 342 *parser.Parser.ID_VAR_TOKENS, 343 TokenType.MATCH_CONDITION, 344 } 345 346 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 347 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 352 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 353 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 354 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 355 this=seq_get(args, 1), expression=seq_get(args, 0) 356 ), 357 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 358 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 359 start=seq_get(args, 0), 360 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 361 step=seq_get(args, 2), 362 ), 363 "BITXOR": binary_from_function(exp.BitwiseXor), 364 "BIT_XOR": binary_from_function(exp.BitwiseXor), 365 "BOOLXOR": binary_from_function(exp.Xor), 366 "CONVERT_TIMEZONE": _build_convert_timezone, 367 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 368 "DATE_TRUNC": _date_trunc_to_time, 369 "DATEADD": _build_date_time_add(exp.DateAdd), 370 "DATEDIFF": _build_datediff, 371 "DIV0": _build_if_from_div0, 372 "FLATTEN": exp.Explode.from_arg_list, 373 "GET_PATH": lambda args, dialect: exp.JSONExtract( 374 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 375 ), 376 "IFF": exp.If.from_arg_list, 377 "LAST_DAY": lambda args: exp.LastDay( 378 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 379 ), 380 "LISTAGG": exp.GroupConcat.from_arg_list, 381 "MEDIAN": lambda args: exp.PercentileCont( 382 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 383 ), 384 "NULLIFZERO": _build_if_from_nullifzero, 385 "OBJECT_CONSTRUCT": _build_object_construct, 386 "REGEXP_REPLACE": _build_regexp_replace, 387 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 388 "RLIKE": exp.RegexpLike.from_arg_list, 389 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 390 "TIMEADD": _build_date_time_add(exp.TimeAdd), 391 "TIMEDIFF": _build_datediff, 392 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 393 "TIMESTAMPDIFF": _build_datediff, 394 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 395 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 396 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 397 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 398 "TO_NUMBER": lambda args: exp.ToNumber( 399 this=seq_get(args, 0), 400 format=seq_get(args, 1), 401 precision=seq_get(args, 2), 402 scale=seq_get(args, 3), 403 ), 404 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 405 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 406 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 407 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 408 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 409 "TO_VARCHAR": exp.ToChar.from_arg_list, 410 "ZEROIFNULL": _build_if_from_zeroifnull, 411 } 412 413 FUNCTION_PARSERS = { 414 **parser.Parser.FUNCTION_PARSERS, 415 "DATE_PART": lambda self: self._parse_date_part(), 416 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 417 } 418 FUNCTION_PARSERS.pop("TRIM") 419 420 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 421 422 RANGE_PARSERS = { 423 **parser.Parser.RANGE_PARSERS, 424 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 425 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 426 } 427 428 ALTER_PARSERS = { 429 **parser.Parser.ALTER_PARSERS, 430 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 431 "UNSET": lambda self: self.expression( 432 exp.Set, 433 tag=self._match_text_seq("TAG"), 434 expressions=self._parse_csv(self._parse_id_var), 435 unset=True, 436 ), 437 "SWAP": lambda self: self._parse_alter_table_swap(), 438 } 439 440 STATEMENT_PARSERS = { 441 **parser.Parser.STATEMENT_PARSERS, 442 TokenType.SHOW: lambda self: self._parse_show(), 443 } 444 445 PROPERTY_PARSERS = { 446 **parser.Parser.PROPERTY_PARSERS, 447 "LOCATION": lambda self: self._parse_location_property(), 448 } 449 450 TYPE_CONVERTER = { 451 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 452 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 453 } 454 455 SHOW_PARSERS = { 456 "SCHEMAS": _show_parser("SCHEMAS"), 457 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 458 "OBJECTS": _show_parser("OBJECTS"), 459 "TERSE OBJECTS": _show_parser("OBJECTS"), 460 "TABLES": _show_parser("TABLES"), 461 "TERSE TABLES": _show_parser("TABLES"), 462 "VIEWS": _show_parser("VIEWS"), 463 "TERSE VIEWS": _show_parser("VIEWS"), 464 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 466 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 468 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 470 "SEQUENCES": _show_parser("SEQUENCES"), 471 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 472 "COLUMNS": _show_parser("COLUMNS"), 473 "USERS": _show_parser("USERS"), 474 "TERSE USERS": _show_parser("USERS"), 475 } 476 477 STAGED_FILE_SINGLE_TOKENS = { 478 TokenType.DOT, 479 TokenType.MOD, 480 TokenType.SLASH, 481 } 482 483 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 484 485 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 486 487 def _parse_create(self) -> exp.Create | exp.Command: 488 expression = super()._parse_create() 489 if isinstance(expression, exp.Create) and expression.kind == "TAG": 490 # Replace the Table node with the enclosed Identifier 491 expression.this.replace(expression.this.this) 492 493 return expression 494 495 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 496 this = super()._parse_column_ops(this) 497 498 casts = [] 499 json_path = [] 500 501 while self._match(TokenType.COLON): 502 path = super()._parse_column_ops(self._parse_field(any_token=True)) 503 504 # The cast :: operator has a lower precedence than the extraction operator :, so 505 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 506 while isinstance(path, exp.Cast): 507 casts.append(path.to) 508 path = path.this 509 510 if path: 511 json_path.append(path.sql(dialect="snowflake", copy=False)) 512 513 if json_path: 514 this = self.expression( 515 exp.JSONExtract, 516 this=this, 517 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 518 ) 519 520 while casts: 521 this = self.expression(exp.Cast, this=this, to=casts.pop()) 522 523 return this 524 525 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 526 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 527 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 528 this = self._parse_var() or self._parse_type() 529 530 if not this: 531 return None 532 533 self._match(TokenType.COMMA) 534 expression = self._parse_bitwise() 535 this = _map_date_part(this) 536 name = this.name.upper() 537 538 if name.startswith("EPOCH"): 539 if name == "EPOCH_MILLISECOND": 540 scale = 10**3 541 elif name == "EPOCH_MICROSECOND": 542 scale = 10**6 543 elif name == "EPOCH_NANOSECOND": 544 scale = 10**9 545 else: 546 scale = None 547 548 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 549 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 550 551 if scale: 552 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 553 554 return to_unix 555 556 return self.expression(exp.Extract, this=this, expression=expression) 557 558 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 559 if is_map: 560 # Keys are strings in Snowflake's objects, see also: 561 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 562 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 563 return self._parse_slice(self._parse_string()) 564 565 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 566 567 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 568 lateral = super()._parse_lateral() 569 if not lateral: 570 return lateral 571 572 if isinstance(lateral.this, exp.Explode): 573 table_alias = lateral.args.get("alias") 574 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 575 if table_alias and not table_alias.args.get("columns"): 576 table_alias.set("columns", columns) 577 elif not table_alias: 578 exp.alias_(lateral, "_flattened", table=columns, copy=False) 579 580 return lateral 581 582 def _parse_at_before(self, table: exp.Table) -> exp.Table: 583 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 584 index = self._index 585 if self._match_texts(("AT", "BEFORE")): 586 this = self._prev.text.upper() 587 kind = ( 588 self._match(TokenType.L_PAREN) 589 and self._match_texts(self.HISTORICAL_DATA_KIND) 590 and self._prev.text.upper() 591 ) 592 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 593 594 if expression: 595 self._match_r_paren() 596 when = self.expression( 597 exp.HistoricalData, this=this, kind=kind, expression=expression 598 ) 599 table.set("when", when) 600 else: 601 self._retreat(index) 602 603 return table 604 605 def _parse_table_parts( 606 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 607 ) -> exp.Table: 608 # https://docs.snowflake.com/en/user-guide/querying-stage 609 if self._match(TokenType.STRING, advance=False): 610 table = self._parse_string() 611 elif self._match_text_seq("@", advance=False): 612 table = self._parse_location_path() 613 else: 614 table = None 615 616 if table: 617 file_format = None 618 pattern = None 619 620 wrapped = self._match(TokenType.L_PAREN) 621 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 622 if self._match_text_seq("FILE_FORMAT", "=>"): 623 file_format = self._parse_string() or super()._parse_table_parts( 624 is_db_reference=is_db_reference 625 ) 626 elif self._match_text_seq("PATTERN", "=>"): 627 pattern = self._parse_string() 628 else: 629 break 630 631 self._match(TokenType.COMMA) 632 633 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 634 else: 635 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 636 637 return self._parse_at_before(table) 638 639 def _parse_id_var( 640 self, 641 any_token: bool = True, 642 tokens: t.Optional[t.Collection[TokenType]] = None, 643 ) -> t.Optional[exp.Expression]: 644 if self._match_text_seq("IDENTIFIER", "("): 645 identifier = ( 646 super()._parse_id_var(any_token=any_token, tokens=tokens) 647 or self._parse_string() 648 ) 649 self._match_r_paren() 650 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 651 652 return super()._parse_id_var(any_token=any_token, tokens=tokens) 653 654 def _parse_show_snowflake(self, this: str) -> exp.Show: 655 scope = None 656 scope_kind = None 657 658 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 659 # which is syntactically valid but has no effect on the output 660 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 661 662 history = self._match_text_seq("HISTORY") 663 664 like = self._parse_string() if self._match(TokenType.LIKE) else None 665 666 if self._match(TokenType.IN): 667 if self._match_text_seq("ACCOUNT"): 668 scope_kind = "ACCOUNT" 669 elif self._match_set(self.DB_CREATABLES): 670 scope_kind = self._prev.text.upper() 671 if self._curr: 672 scope = self._parse_table_parts() 673 elif self._curr: 674 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 675 scope = self._parse_table_parts() 676 677 return self.expression( 678 exp.Show, 679 **{ 680 "terse": terse, 681 "this": this, 682 "history": history, 683 "like": like, 684 "scope": scope, 685 "scope_kind": scope_kind, 686 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 687 "limit": self._parse_limit(), 688 "from": self._parse_string() if self._match(TokenType.FROM) else None, 689 }, 690 ) 691 692 def _parse_alter_table_swap(self) -> exp.SwapTable: 693 self._match_text_seq("WITH") 694 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 695 696 def _parse_location_property(self) -> exp.LocationProperty: 697 self._match(TokenType.EQ) 698 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 699 700 def _parse_file_location(self) -> t.Optional[exp.Expression]: 701 # Parse either a subquery or a staged file 702 return ( 703 self._parse_select(table=True) 704 if self._match(TokenType.L_PAREN, advance=False) 705 else self._parse_table_parts() 706 ) 707 708 def _parse_location_path(self) -> exp.Var: 709 parts = [self._advance_any(ignore_reserved=True)] 710 711 # We avoid consuming a comma token because external tables like @foo and @bar 712 # can be joined in a query with a comma separator, as well as closing paren 713 # in case of subqueries 714 while self._is_connected() and not self._match_set( 715 (TokenType.COMMA, TokenType.R_PAREN), advance=False 716 ): 717 parts.append(self._advance_any(ignore_reserved=True)) 718 719 return exp.var("".join(part.text for part in parts if part)) 720 721 class Tokenizer(tokens.Tokenizer): 722 STRING_ESCAPES = ["\\", "'"] 723 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 724 RAW_STRINGS = ["$$"] 725 COMMENTS = ["--", "//", ("/*", "*/")] 726 727 KEYWORDS = { 728 **tokens.Tokenizer.KEYWORDS, 729 "BYTEINT": TokenType.INT, 730 "CHAR VARYING": TokenType.VARCHAR, 731 "CHARACTER VARYING": TokenType.VARCHAR, 732 "EXCLUDE": TokenType.EXCEPT, 733 "ILIKE ANY": TokenType.ILIKE_ANY, 734 "LIKE ANY": TokenType.LIKE_ANY, 735 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 736 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 737 "MINUS": TokenType.EXCEPT, 738 "NCHAR VARYING": TokenType.VARCHAR, 739 "PUT": TokenType.COMMAND, 740 "REMOVE": TokenType.COMMAND, 741 "RM": TokenType.COMMAND, 742 "SAMPLE": TokenType.TABLE_SAMPLE, 743 "SQL_DOUBLE": TokenType.DOUBLE, 744 "SQL_VARCHAR": TokenType.VARCHAR, 745 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 746 "TAG": TokenType.TAG, 747 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 748 "TOP": TokenType.TOP, 749 } 750 751 SINGLE_TOKENS = { 752 **tokens.Tokenizer.SINGLE_TOKENS, 753 "$": TokenType.PARAMETER, 754 } 755 756 VAR_SINGLE_TOKENS = {"$"} 757 758 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 759 760 class Generator(generator.Generator): 761 PARAMETER_TOKEN = "$" 762 MATCHED_BY_SOURCE = False 763 SINGLE_STRING_INTERVAL = True 764 JOIN_HINTS = False 765 TABLE_HINTS = False 766 QUERY_HINTS = False 767 AGGREGATE_FILTER_SUPPORTED = False 768 SUPPORTS_TABLE_COPY = False 769 COLLATE_IS_FUNC = True 770 LIMIT_ONLY_LITERALS = True 771 JSON_KEY_VALUE_PAIR_SEP = "," 772 INSERT_OVERWRITE = " OVERWRITE INTO" 773 STRUCT_DELIMITER = ("(", ")") 774 COPY_PARAMS_ARE_WRAPPED = False 775 COPY_PARAMS_EQ_REQUIRED = True 776 STAR_EXCEPT = "EXCLUDE" 777 778 TRANSFORMS = { 779 **generator.Generator.TRANSFORMS, 780 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 781 exp.ArgMax: rename_func("MAX_BY"), 782 exp.ArgMin: rename_func("MIN_BY"), 783 exp.Array: inline_array_sql, 784 exp.ArrayConcat: rename_func("ARRAY_CAT"), 785 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 786 exp.AtTimeZone: lambda self, e: self.func( 787 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 788 ), 789 exp.BitwiseXor: rename_func("BITXOR"), 790 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 791 exp.DateAdd: date_delta_sql("DATEADD"), 792 exp.DateDiff: date_delta_sql("DATEDIFF"), 793 exp.DateStrToDate: datestrtodate_sql, 794 exp.DayOfMonth: rename_func("DAYOFMONTH"), 795 exp.DayOfWeek: rename_func("DAYOFWEEK"), 796 exp.DayOfYear: rename_func("DAYOFYEAR"), 797 exp.Explode: rename_func("FLATTEN"), 798 exp.Extract: rename_func("DATE_PART"), 799 exp.FromTimeZone: lambda self, e: self.func( 800 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 801 ), 802 exp.GenerateSeries: lambda self, e: self.func( 803 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 804 ), 805 exp.GroupConcat: rename_func("LISTAGG"), 806 exp.If: if_sql(name="IFF", false_value="NULL"), 807 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 808 exp.JSONExtractScalar: lambda self, e: self.func( 809 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 810 ), 811 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 812 exp.JSONPathRoot: lambda *_: "", 813 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 814 exp.LogicalOr: rename_func("BOOLOR_AGG"), 815 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 816 exp.Max: max_or_greatest, 817 exp.Min: min_or_least, 818 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 819 exp.PercentileCont: transforms.preprocess( 820 [transforms.add_within_group_for_percentiles] 821 ), 822 exp.PercentileDisc: transforms.preprocess( 823 [transforms.add_within_group_for_percentiles] 824 ), 825 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 826 exp.RegexpILike: _regexpilike_sql, 827 exp.Rand: rename_func("RANDOM"), 828 exp.Select: transforms.preprocess( 829 [ 830 transforms.eliminate_distinct_on, 831 transforms.explode_to_unnest(), 832 transforms.eliminate_semi_and_anti_joins, 833 ] 834 ), 835 exp.SHA: rename_func("SHA1"), 836 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 837 exp.StartsWith: rename_func("STARTSWITH"), 838 exp.StrPosition: lambda self, e: self.func( 839 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 840 ), 841 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 842 exp.Stuff: rename_func("INSERT"), 843 exp.TimeAdd: date_delta_sql("TIMEADD"), 844 exp.TimestampDiff: lambda self, e: self.func( 845 "TIMESTAMPDIFF", e.unit, e.expression, e.this 846 ), 847 exp.TimestampTrunc: timestamptrunc_sql(), 848 exp.TimeStrToTime: timestrtotime_sql, 849 exp.TimeToStr: lambda self, e: self.func( 850 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 851 ), 852 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 853 exp.ToArray: rename_func("TO_ARRAY"), 854 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 855 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 856 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 857 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 858 exp.TsOrDsToDate: lambda self, e: self.func( 859 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 860 ), 861 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 862 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 863 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 864 exp.Xor: rename_func("BOOLXOR"), 865 } 866 867 SUPPORTED_JSON_PATH_PARTS = { 868 exp.JSONPathKey, 869 exp.JSONPathRoot, 870 exp.JSONPathSubscript, 871 } 872 873 TYPE_MAPPING = { 874 **generator.Generator.TYPE_MAPPING, 875 exp.DataType.Type.NESTED: "OBJECT", 876 exp.DataType.Type.STRUCT: "OBJECT", 877 } 878 879 PROPERTIES_LOCATION = { 880 **generator.Generator.PROPERTIES_LOCATION, 881 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 882 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 883 } 884 885 UNSUPPORTED_VALUES_EXPRESSIONS = { 886 exp.Map, 887 exp.StarMap, 888 exp.Struct, 889 exp.VarMap, 890 } 891 892 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 893 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 894 values_as_table = False 895 896 return super().values_sql(expression, values_as_table=values_as_table) 897 898 def datatype_sql(self, expression: exp.DataType) -> str: 899 expressions = expression.expressions 900 if ( 901 expressions 902 and expression.is_type(*exp.DataType.STRUCT_TYPES) 903 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 904 ): 905 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 906 return "OBJECT" 907 908 return super().datatype_sql(expression) 909 910 def tonumber_sql(self, expression: exp.ToNumber) -> str: 911 return self.func( 912 "TO_NUMBER", 913 expression.this, 914 expression.args.get("format"), 915 expression.args.get("precision"), 916 expression.args.get("scale"), 917 ) 918 919 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 920 milli = expression.args.get("milli") 921 if milli is not None: 922 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 923 expression.set("nano", milli_to_nano) 924 925 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 926 927 def trycast_sql(self, expression: exp.TryCast) -> str: 928 value = expression.this 929 930 if value.type is None: 931 from sqlglot.optimizer.annotate_types import annotate_types 932 933 value = annotate_types(value) 934 935 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 936 return super().trycast_sql(expression) 937 938 # TRY_CAST only works for string values in Snowflake 939 return self.cast_sql(expression) 940 941 def log_sql(self, expression: exp.Log) -> str: 942 if not expression.expression: 943 return self.func("LN", expression.this) 944 945 return super().log_sql(expression) 946 947 def unnest_sql(self, expression: exp.Unnest) -> str: 948 unnest_alias = expression.args.get("alias") 949 offset = expression.args.get("offset") 950 951 columns = [ 952 exp.to_identifier("seq"), 953 exp.to_identifier("key"), 954 exp.to_identifier("path"), 955 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 956 seq_get(unnest_alias.columns if unnest_alias else [], 0) 957 or exp.to_identifier("value"), 958 exp.to_identifier("this"), 959 ] 960 961 if unnest_alias: 962 unnest_alias.set("columns", columns) 963 else: 964 unnest_alias = exp.TableAlias(this="_u", columns=columns) 965 966 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 967 alias = self.sql(unnest_alias) 968 alias = f" AS {alias}" if alias else "" 969 return f"{explode}{alias}" 970 971 def show_sql(self, expression: exp.Show) -> str: 972 terse = "TERSE " if expression.args.get("terse") else "" 973 history = " HISTORY" if expression.args.get("history") else "" 974 like = self.sql(expression, "like") 975 like = f" LIKE {like}" if like else "" 976 977 scope = self.sql(expression, "scope") 978 scope = f" {scope}" if scope else "" 979 980 scope_kind = self.sql(expression, "scope_kind") 981 if scope_kind: 982 scope_kind = f" IN {scope_kind}" 983 984 starts_with = self.sql(expression, "starts_with") 985 if starts_with: 986 starts_with = f" STARTS WITH {starts_with}" 987 988 limit = self.sql(expression, "limit") 989 990 from_ = self.sql(expression, "from") 991 if from_: 992 from_ = f" FROM {from_}" 993 994 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 995 996 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 997 # Other dialects don't support all of the following parameters, so we need to 998 # generate default values as necessary to ensure the transpilation is correct 999 group = expression.args.get("group") 1000 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1001 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1002 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1003 1004 return self.func( 1005 "REGEXP_SUBSTR", 1006 expression.this, 1007 expression.expression, 1008 position, 1009 occurrence, 1010 parameters, 1011 group, 1012 ) 1013 1014 def except_op(self, expression: exp.Except) -> str: 1015 if not expression.args.get("distinct"): 1016 self.unsupported("EXCEPT with All is not supported in Snowflake") 1017 return super().except_op(expression) 1018 1019 def intersect_op(self, expression: exp.Intersect) -> str: 1020 if not expression.args.get("distinct"): 1021 self.unsupported("INTERSECT with All is not supported in Snowflake") 1022 return super().intersect_op(expression) 1023 1024 def describe_sql(self, expression: exp.Describe) -> str: 1025 # Default to table if kind is unknown 1026 kind_value = expression.args.get("kind") or "TABLE" 1027 kind = f" {kind_value}" if kind_value else "" 1028 this = f" {self.sql(expression, 'this')}" 1029 expressions = self.expressions(expression, flat=True) 1030 expressions = f" {expressions}" if expressions else "" 1031 return f"DESCRIBE{kind}{this}{expressions}" 1032 1033 def generatedasidentitycolumnconstraint_sql( 1034 self, expression: exp.GeneratedAsIdentityColumnConstraint 1035 ) -> str: 1036 start = expression.args.get("start") 1037 start = f" START {start}" if start else "" 1038 increment = expression.args.get("increment") 1039 increment = f" INCREMENT {increment}" if increment else "" 1040 return f"AUTOINCREMENT{start}{increment}" 1041 1042 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1043 this = self.sql(expression, "this") 1044 return f"SWAP WITH {this}" 1045 1046 def with_properties(self, properties: exp.Properties) -> str: 1047 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 1048 1049 def cluster_sql(self, expression: exp.Cluster) -> str: 1050 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1051 1052 def struct_sql(self, expression: exp.Struct) -> str: 1053 keys = [] 1054 values = [] 1055 1056 for i, e in enumerate(expression.expressions): 1057 if isinstance(e, exp.PropertyEQ): 1058 keys.append( 1059 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1060 ) 1061 values.append(e.expression) 1062 else: 1063 keys.append(exp.Literal.string(f"_{i}")) 1064 values.append(e) 1065 1066 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1067 1068 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1069 if expression.args.get("weight") or expression.args.get("accuracy"): 1070 self.unsupported( 1071 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1072 ) 1073 1074 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
325 def quote_identifier(self, expression: E, identify: bool = True) -> E: 326 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 327 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 328 if ( 329 isinstance(expression, exp.Identifier) 330 and isinstance(expression.parent, exp.Table) 331 and expression.name.lower() == "dual" 332 ): 333 return expression # type: ignore 334 335 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
337 class Parser(parser.Parser): 338 IDENTIFY_PIVOT_STRINGS = True 339 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 340 341 ID_VAR_TOKENS = { 342 *parser.Parser.ID_VAR_TOKENS, 343 TokenType.MATCH_CONDITION, 344 } 345 346 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 347 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 352 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 353 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 354 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 355 this=seq_get(args, 1), expression=seq_get(args, 0) 356 ), 357 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 358 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 359 start=seq_get(args, 0), 360 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 361 step=seq_get(args, 2), 362 ), 363 "BITXOR": binary_from_function(exp.BitwiseXor), 364 "BIT_XOR": binary_from_function(exp.BitwiseXor), 365 "BOOLXOR": binary_from_function(exp.Xor), 366 "CONVERT_TIMEZONE": _build_convert_timezone, 367 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 368 "DATE_TRUNC": _date_trunc_to_time, 369 "DATEADD": _build_date_time_add(exp.DateAdd), 370 "DATEDIFF": _build_datediff, 371 "DIV0": _build_if_from_div0, 372 "FLATTEN": exp.Explode.from_arg_list, 373 "GET_PATH": lambda args, dialect: exp.JSONExtract( 374 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 375 ), 376 "IFF": exp.If.from_arg_list, 377 "LAST_DAY": lambda args: exp.LastDay( 378 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 379 ), 380 "LISTAGG": exp.GroupConcat.from_arg_list, 381 "MEDIAN": lambda args: exp.PercentileCont( 382 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 383 ), 384 "NULLIFZERO": _build_if_from_nullifzero, 385 "OBJECT_CONSTRUCT": _build_object_construct, 386 "REGEXP_REPLACE": _build_regexp_replace, 387 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 388 "RLIKE": exp.RegexpLike.from_arg_list, 389 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 390 "TIMEADD": _build_date_time_add(exp.TimeAdd), 391 "TIMEDIFF": _build_datediff, 392 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 393 "TIMESTAMPDIFF": _build_datediff, 394 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 395 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 396 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 397 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 398 "TO_NUMBER": lambda args: exp.ToNumber( 399 this=seq_get(args, 0), 400 format=seq_get(args, 1), 401 precision=seq_get(args, 2), 402 scale=seq_get(args, 3), 403 ), 404 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 405 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 406 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 407 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 408 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 409 "TO_VARCHAR": exp.ToChar.from_arg_list, 410 "ZEROIFNULL": _build_if_from_zeroifnull, 411 } 412 413 FUNCTION_PARSERS = { 414 **parser.Parser.FUNCTION_PARSERS, 415 "DATE_PART": lambda self: self._parse_date_part(), 416 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 417 } 418 FUNCTION_PARSERS.pop("TRIM") 419 420 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 421 422 RANGE_PARSERS = { 423 **parser.Parser.RANGE_PARSERS, 424 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 425 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 426 } 427 428 ALTER_PARSERS = { 429 **parser.Parser.ALTER_PARSERS, 430 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 431 "UNSET": lambda self: self.expression( 432 exp.Set, 433 tag=self._match_text_seq("TAG"), 434 expressions=self._parse_csv(self._parse_id_var), 435 unset=True, 436 ), 437 "SWAP": lambda self: self._parse_alter_table_swap(), 438 } 439 440 STATEMENT_PARSERS = { 441 **parser.Parser.STATEMENT_PARSERS, 442 TokenType.SHOW: lambda self: self._parse_show(), 443 } 444 445 PROPERTY_PARSERS = { 446 **parser.Parser.PROPERTY_PARSERS, 447 "LOCATION": lambda self: self._parse_location_property(), 448 } 449 450 TYPE_CONVERTER = { 451 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 452 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 453 } 454 455 SHOW_PARSERS = { 456 "SCHEMAS": _show_parser("SCHEMAS"), 457 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 458 "OBJECTS": _show_parser("OBJECTS"), 459 "TERSE OBJECTS": _show_parser("OBJECTS"), 460 "TABLES": _show_parser("TABLES"), 461 "TERSE TABLES": _show_parser("TABLES"), 462 "VIEWS": _show_parser("VIEWS"), 463 "TERSE VIEWS": _show_parser("VIEWS"), 464 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 466 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 468 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 470 "SEQUENCES": _show_parser("SEQUENCES"), 471 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 472 "COLUMNS": _show_parser("COLUMNS"), 473 "USERS": _show_parser("USERS"), 474 "TERSE USERS": _show_parser("USERS"), 475 } 476 477 STAGED_FILE_SINGLE_TOKENS = { 478 TokenType.DOT, 479 TokenType.MOD, 480 TokenType.SLASH, 481 } 482 483 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 484 485 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 486 487 def _parse_create(self) -> exp.Create | exp.Command: 488 expression = super()._parse_create() 489 if isinstance(expression, exp.Create) and expression.kind == "TAG": 490 # Replace the Table node with the enclosed Identifier 491 expression.this.replace(expression.this.this) 492 493 return expression 494 495 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 496 this = super()._parse_column_ops(this) 497 498 casts = [] 499 json_path = [] 500 501 while self._match(TokenType.COLON): 502 path = super()._parse_column_ops(self._parse_field(any_token=True)) 503 504 # The cast :: operator has a lower precedence than the extraction operator :, so 505 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 506 while isinstance(path, exp.Cast): 507 casts.append(path.to) 508 path = path.this 509 510 if path: 511 json_path.append(path.sql(dialect="snowflake", copy=False)) 512 513 if json_path: 514 this = self.expression( 515 exp.JSONExtract, 516 this=this, 517 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 518 ) 519 520 while casts: 521 this = self.expression(exp.Cast, this=this, to=casts.pop()) 522 523 return this 524 525 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 526 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 527 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 528 this = self._parse_var() or self._parse_type() 529 530 if not this: 531 return None 532 533 self._match(TokenType.COMMA) 534 expression = self._parse_bitwise() 535 this = _map_date_part(this) 536 name = this.name.upper() 537 538 if name.startswith("EPOCH"): 539 if name == "EPOCH_MILLISECOND": 540 scale = 10**3 541 elif name == "EPOCH_MICROSECOND": 542 scale = 10**6 543 elif name == "EPOCH_NANOSECOND": 544 scale = 10**9 545 else: 546 scale = None 547 548 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 549 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 550 551 if scale: 552 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 553 554 return to_unix 555 556 return self.expression(exp.Extract, this=this, expression=expression) 557 558 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 559 if is_map: 560 # Keys are strings in Snowflake's objects, see also: 561 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 562 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 563 return self._parse_slice(self._parse_string()) 564 565 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 566 567 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 568 lateral = super()._parse_lateral() 569 if not lateral: 570 return lateral 571 572 if isinstance(lateral.this, exp.Explode): 573 table_alias = lateral.args.get("alias") 574 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 575 if table_alias and not table_alias.args.get("columns"): 576 table_alias.set("columns", columns) 577 elif not table_alias: 578 exp.alias_(lateral, "_flattened", table=columns, copy=False) 579 580 return lateral 581 582 def _parse_at_before(self, table: exp.Table) -> exp.Table: 583 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 584 index = self._index 585 if self._match_texts(("AT", "BEFORE")): 586 this = self._prev.text.upper() 587 kind = ( 588 self._match(TokenType.L_PAREN) 589 and self._match_texts(self.HISTORICAL_DATA_KIND) 590 and self._prev.text.upper() 591 ) 592 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 593 594 if expression: 595 self._match_r_paren() 596 when = self.expression( 597 exp.HistoricalData, this=this, kind=kind, expression=expression 598 ) 599 table.set("when", when) 600 else: 601 self._retreat(index) 602 603 return table 604 605 def _parse_table_parts( 606 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 607 ) -> exp.Table: 608 # https://docs.snowflake.com/en/user-guide/querying-stage 609 if self._match(TokenType.STRING, advance=False): 610 table = self._parse_string() 611 elif self._match_text_seq("@", advance=False): 612 table = self._parse_location_path() 613 else: 614 table = None 615 616 if table: 617 file_format = None 618 pattern = None 619 620 wrapped = self._match(TokenType.L_PAREN) 621 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 622 if self._match_text_seq("FILE_FORMAT", "=>"): 623 file_format = self._parse_string() or super()._parse_table_parts( 624 is_db_reference=is_db_reference 625 ) 626 elif self._match_text_seq("PATTERN", "=>"): 627 pattern = self._parse_string() 628 else: 629 break 630 631 self._match(TokenType.COMMA) 632 633 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 634 else: 635 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 636 637 return self._parse_at_before(table) 638 639 def _parse_id_var( 640 self, 641 any_token: bool = True, 642 tokens: t.Optional[t.Collection[TokenType]] = None, 643 ) -> t.Optional[exp.Expression]: 644 if self._match_text_seq("IDENTIFIER", "("): 645 identifier = ( 646 super()._parse_id_var(any_token=any_token, tokens=tokens) 647 or self._parse_string() 648 ) 649 self._match_r_paren() 650 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 651 652 return super()._parse_id_var(any_token=any_token, tokens=tokens) 653 654 def _parse_show_snowflake(self, this: str) -> exp.Show: 655 scope = None 656 scope_kind = None 657 658 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 659 # which is syntactically valid but has no effect on the output 660 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 661 662 history = self._match_text_seq("HISTORY") 663 664 like = self._parse_string() if self._match(TokenType.LIKE) else None 665 666 if self._match(TokenType.IN): 667 if self._match_text_seq("ACCOUNT"): 668 scope_kind = "ACCOUNT" 669 elif self._match_set(self.DB_CREATABLES): 670 scope_kind = self._prev.text.upper() 671 if self._curr: 672 scope = self._parse_table_parts() 673 elif self._curr: 674 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 675 scope = self._parse_table_parts() 676 677 return self.expression( 678 exp.Show, 679 **{ 680 "terse": terse, 681 "this": this, 682 "history": history, 683 "like": like, 684 "scope": scope, 685 "scope_kind": scope_kind, 686 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 687 "limit": self._parse_limit(), 688 "from": self._parse_string() if self._match(TokenType.FROM) else None, 689 }, 690 ) 691 692 def _parse_alter_table_swap(self) -> exp.SwapTable: 693 self._match_text_seq("WITH") 694 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 695 696 def _parse_location_property(self) -> exp.LocationProperty: 697 self._match(TokenType.EQ) 698 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 699 700 def _parse_file_location(self) -> t.Optional[exp.Expression]: 701 # Parse either a subquery or a staged file 702 return ( 703 self._parse_select(table=True) 704 if self._match(TokenType.L_PAREN, advance=False) 705 else self._parse_table_parts() 706 ) 707 708 def _parse_location_path(self) -> exp.Var: 709 parts = [self._advance_any(ignore_reserved=True)] 710 711 # We avoid consuming a comma token because external tables like @foo and @bar 712 # can be joined in a query with a comma separator, as well as closing paren 713 # in case of subqueries 714 while self._is_connected() and not self._match_set( 715 (TokenType.COMMA, TokenType.R_PAREN), advance=False 716 ): 717 parts.append(self._advance_any(ignore_reserved=True)) 718 719 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
721 class Tokenizer(tokens.Tokenizer): 722 STRING_ESCAPES = ["\\", "'"] 723 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 724 RAW_STRINGS = ["$$"] 725 COMMENTS = ["--", "//", ("/*", "*/")] 726 727 KEYWORDS = { 728 **tokens.Tokenizer.KEYWORDS, 729 "BYTEINT": TokenType.INT, 730 "CHAR VARYING": TokenType.VARCHAR, 731 "CHARACTER VARYING": TokenType.VARCHAR, 732 "EXCLUDE": TokenType.EXCEPT, 733 "ILIKE ANY": TokenType.ILIKE_ANY, 734 "LIKE ANY": TokenType.LIKE_ANY, 735 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 736 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 737 "MINUS": TokenType.EXCEPT, 738 "NCHAR VARYING": TokenType.VARCHAR, 739 "PUT": TokenType.COMMAND, 740 "REMOVE": TokenType.COMMAND, 741 "RM": TokenType.COMMAND, 742 "SAMPLE": TokenType.TABLE_SAMPLE, 743 "SQL_DOUBLE": TokenType.DOUBLE, 744 "SQL_VARCHAR": TokenType.VARCHAR, 745 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 746 "TAG": TokenType.TAG, 747 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 748 "TOP": TokenType.TOP, 749 } 750 751 SINGLE_TOKENS = { 752 **tokens.Tokenizer.SINGLE_TOKENS, 753 "$": TokenType.PARAMETER, 754 } 755 756 VAR_SINGLE_TOKENS = {"$"} 757 758 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
760 class Generator(generator.Generator): 761 PARAMETER_TOKEN = "$" 762 MATCHED_BY_SOURCE = False 763 SINGLE_STRING_INTERVAL = True 764 JOIN_HINTS = False 765 TABLE_HINTS = False 766 QUERY_HINTS = False 767 AGGREGATE_FILTER_SUPPORTED = False 768 SUPPORTS_TABLE_COPY = False 769 COLLATE_IS_FUNC = True 770 LIMIT_ONLY_LITERALS = True 771 JSON_KEY_VALUE_PAIR_SEP = "," 772 INSERT_OVERWRITE = " OVERWRITE INTO" 773 STRUCT_DELIMITER = ("(", ")") 774 COPY_PARAMS_ARE_WRAPPED = False 775 COPY_PARAMS_EQ_REQUIRED = True 776 STAR_EXCEPT = "EXCLUDE" 777 778 TRANSFORMS = { 779 **generator.Generator.TRANSFORMS, 780 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 781 exp.ArgMax: rename_func("MAX_BY"), 782 exp.ArgMin: rename_func("MIN_BY"), 783 exp.Array: inline_array_sql, 784 exp.ArrayConcat: rename_func("ARRAY_CAT"), 785 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 786 exp.AtTimeZone: lambda self, e: self.func( 787 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 788 ), 789 exp.BitwiseXor: rename_func("BITXOR"), 790 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 791 exp.DateAdd: date_delta_sql("DATEADD"), 792 exp.DateDiff: date_delta_sql("DATEDIFF"), 793 exp.DateStrToDate: datestrtodate_sql, 794 exp.DayOfMonth: rename_func("DAYOFMONTH"), 795 exp.DayOfWeek: rename_func("DAYOFWEEK"), 796 exp.DayOfYear: rename_func("DAYOFYEAR"), 797 exp.Explode: rename_func("FLATTEN"), 798 exp.Extract: rename_func("DATE_PART"), 799 exp.FromTimeZone: lambda self, e: self.func( 800 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 801 ), 802 exp.GenerateSeries: lambda self, e: self.func( 803 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 804 ), 805 exp.GroupConcat: rename_func("LISTAGG"), 806 exp.If: if_sql(name="IFF", false_value="NULL"), 807 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 808 exp.JSONExtractScalar: lambda self, e: self.func( 809 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 810 ), 811 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 812 exp.JSONPathRoot: lambda *_: "", 813 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 814 exp.LogicalOr: rename_func("BOOLOR_AGG"), 815 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 816 exp.Max: max_or_greatest, 817 exp.Min: min_or_least, 818 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 819 exp.PercentileCont: transforms.preprocess( 820 [transforms.add_within_group_for_percentiles] 821 ), 822 exp.PercentileDisc: transforms.preprocess( 823 [transforms.add_within_group_for_percentiles] 824 ), 825 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 826 exp.RegexpILike: _regexpilike_sql, 827 exp.Rand: rename_func("RANDOM"), 828 exp.Select: transforms.preprocess( 829 [ 830 transforms.eliminate_distinct_on, 831 transforms.explode_to_unnest(), 832 transforms.eliminate_semi_and_anti_joins, 833 ] 834 ), 835 exp.SHA: rename_func("SHA1"), 836 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 837 exp.StartsWith: rename_func("STARTSWITH"), 838 exp.StrPosition: lambda self, e: self.func( 839 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 840 ), 841 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 842 exp.Stuff: rename_func("INSERT"), 843 exp.TimeAdd: date_delta_sql("TIMEADD"), 844 exp.TimestampDiff: lambda self, e: self.func( 845 "TIMESTAMPDIFF", e.unit, e.expression, e.this 846 ), 847 exp.TimestampTrunc: timestamptrunc_sql(), 848 exp.TimeStrToTime: timestrtotime_sql, 849 exp.TimeToStr: lambda self, e: self.func( 850 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 851 ), 852 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 853 exp.ToArray: rename_func("TO_ARRAY"), 854 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 855 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 856 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 857 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 858 exp.TsOrDsToDate: lambda self, e: self.func( 859 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 860 ), 861 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 862 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 863 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 864 exp.Xor: rename_func("BOOLXOR"), 865 } 866 867 SUPPORTED_JSON_PATH_PARTS = { 868 exp.JSONPathKey, 869 exp.JSONPathRoot, 870 exp.JSONPathSubscript, 871 } 872 873 TYPE_MAPPING = { 874 **generator.Generator.TYPE_MAPPING, 875 exp.DataType.Type.NESTED: "OBJECT", 876 exp.DataType.Type.STRUCT: "OBJECT", 877 } 878 879 PROPERTIES_LOCATION = { 880 **generator.Generator.PROPERTIES_LOCATION, 881 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 882 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 883 } 884 885 UNSUPPORTED_VALUES_EXPRESSIONS = { 886 exp.Map, 887 exp.StarMap, 888 exp.Struct, 889 exp.VarMap, 890 } 891 892 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 893 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 894 values_as_table = False 895 896 return super().values_sql(expression, values_as_table=values_as_table) 897 898 def datatype_sql(self, expression: exp.DataType) -> str: 899 expressions = expression.expressions 900 if ( 901 expressions 902 and expression.is_type(*exp.DataType.STRUCT_TYPES) 903 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 904 ): 905 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 906 return "OBJECT" 907 908 return super().datatype_sql(expression) 909 910 def tonumber_sql(self, expression: exp.ToNumber) -> str: 911 return self.func( 912 "TO_NUMBER", 913 expression.this, 914 expression.args.get("format"), 915 expression.args.get("precision"), 916 expression.args.get("scale"), 917 ) 918 919 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 920 milli = expression.args.get("milli") 921 if milli is not None: 922 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 923 expression.set("nano", milli_to_nano) 924 925 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 926 927 def trycast_sql(self, expression: exp.TryCast) -> str: 928 value = expression.this 929 930 if value.type is None: 931 from sqlglot.optimizer.annotate_types import annotate_types 932 933 value = annotate_types(value) 934 935 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 936 return super().trycast_sql(expression) 937 938 # TRY_CAST only works for string values in Snowflake 939 return self.cast_sql(expression) 940 941 def log_sql(self, expression: exp.Log) -> str: 942 if not expression.expression: 943 return self.func("LN", expression.this) 944 945 return super().log_sql(expression) 946 947 def unnest_sql(self, expression: exp.Unnest) -> str: 948 unnest_alias = expression.args.get("alias") 949 offset = expression.args.get("offset") 950 951 columns = [ 952 exp.to_identifier("seq"), 953 exp.to_identifier("key"), 954 exp.to_identifier("path"), 955 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 956 seq_get(unnest_alias.columns if unnest_alias else [], 0) 957 or exp.to_identifier("value"), 958 exp.to_identifier("this"), 959 ] 960 961 if unnest_alias: 962 unnest_alias.set("columns", columns) 963 else: 964 unnest_alias = exp.TableAlias(this="_u", columns=columns) 965 966 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 967 alias = self.sql(unnest_alias) 968 alias = f" AS {alias}" if alias else "" 969 return f"{explode}{alias}" 970 971 def show_sql(self, expression: exp.Show) -> str: 972 terse = "TERSE " if expression.args.get("terse") else "" 973 history = " HISTORY" if expression.args.get("history") else "" 974 like = self.sql(expression, "like") 975 like = f" LIKE {like}" if like else "" 976 977 scope = self.sql(expression, "scope") 978 scope = f" {scope}" if scope else "" 979 980 scope_kind = self.sql(expression, "scope_kind") 981 if scope_kind: 982 scope_kind = f" IN {scope_kind}" 983 984 starts_with = self.sql(expression, "starts_with") 985 if starts_with: 986 starts_with = f" STARTS WITH {starts_with}" 987 988 limit = self.sql(expression, "limit") 989 990 from_ = self.sql(expression, "from") 991 if from_: 992 from_ = f" FROM {from_}" 993 994 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 995 996 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 997 # Other dialects don't support all of the following parameters, so we need to 998 # generate default values as necessary to ensure the transpilation is correct 999 group = expression.args.get("group") 1000 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1001 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1002 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1003 1004 return self.func( 1005 "REGEXP_SUBSTR", 1006 expression.this, 1007 expression.expression, 1008 position, 1009 occurrence, 1010 parameters, 1011 group, 1012 ) 1013 1014 def except_op(self, expression: exp.Except) -> str: 1015 if not expression.args.get("distinct"): 1016 self.unsupported("EXCEPT with All is not supported in Snowflake") 1017 return super().except_op(expression) 1018 1019 def intersect_op(self, expression: exp.Intersect) -> str: 1020 if not expression.args.get("distinct"): 1021 self.unsupported("INTERSECT with All is not supported in Snowflake") 1022 return super().intersect_op(expression) 1023 1024 def describe_sql(self, expression: exp.Describe) -> str: 1025 # Default to table if kind is unknown 1026 kind_value = expression.args.get("kind") or "TABLE" 1027 kind = f" {kind_value}" if kind_value else "" 1028 this = f" {self.sql(expression, 'this')}" 1029 expressions = self.expressions(expression, flat=True) 1030 expressions = f" {expressions}" if expressions else "" 1031 return f"DESCRIBE{kind}{this}{expressions}" 1032 1033 def generatedasidentitycolumnconstraint_sql( 1034 self, expression: exp.GeneratedAsIdentityColumnConstraint 1035 ) -> str: 1036 start = expression.args.get("start") 1037 start = f" START {start}" if start else "" 1038 increment = expression.args.get("increment") 1039 increment = f" INCREMENT {increment}" if increment else "" 1040 return f"AUTOINCREMENT{start}{increment}" 1041 1042 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1043 this = self.sql(expression, "this") 1044 return f"SWAP WITH {this}" 1045 1046 def with_properties(self, properties: exp.Properties) -> str: 1047 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 1048 1049 def cluster_sql(self, expression: exp.Cluster) -> str: 1050 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1051 1052 def struct_sql(self, expression: exp.Struct) -> str: 1053 keys = [] 1054 values = [] 1055 1056 for i, e in enumerate(expression.expressions): 1057 if isinstance(e, exp.PropertyEQ): 1058 keys.append( 1059 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1060 ) 1061 values.append(e.expression) 1062 else: 1063 keys.append(exp.Literal.string(f"_{i}")) 1064 values.append(e) 1065 1066 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1067 1068 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1069 if expression.args.get("weight") or expression.args.get("accuracy"): 1070 self.unsupported( 1071 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1072 ) 1073 1074 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
898 def datatype_sql(self, expression: exp.DataType) -> str: 899 expressions = expression.expressions 900 if ( 901 expressions 902 and expression.is_type(*exp.DataType.STRUCT_TYPES) 903 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 904 ): 905 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 906 return "OBJECT" 907 908 return super().datatype_sql(expression)
919 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 920 milli = expression.args.get("milli") 921 if milli is not None: 922 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 923 expression.set("nano", milli_to_nano) 924 925 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
927 def trycast_sql(self, expression: exp.TryCast) -> str: 928 value = expression.this 929 930 if value.type is None: 931 from sqlglot.optimizer.annotate_types import annotate_types 932 933 value = annotate_types(value) 934 935 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 936 return super().trycast_sql(expression) 937 938 # TRY_CAST only works for string values in Snowflake 939 return self.cast_sql(expression)
947 def unnest_sql(self, expression: exp.Unnest) -> str: 948 unnest_alias = expression.args.get("alias") 949 offset = expression.args.get("offset") 950 951 columns = [ 952 exp.to_identifier("seq"), 953 exp.to_identifier("key"), 954 exp.to_identifier("path"), 955 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 956 seq_get(unnest_alias.columns if unnest_alias else [], 0) 957 or exp.to_identifier("value"), 958 exp.to_identifier("this"), 959 ] 960 961 if unnest_alias: 962 unnest_alias.set("columns", columns) 963 else: 964 unnest_alias = exp.TableAlias(this="_u", columns=columns) 965 966 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 967 alias = self.sql(unnest_alias) 968 alias = f" AS {alias}" if alias else "" 969 return f"{explode}{alias}"
971 def show_sql(self, expression: exp.Show) -> str: 972 terse = "TERSE " if expression.args.get("terse") else "" 973 history = " HISTORY" if expression.args.get("history") else "" 974 like = self.sql(expression, "like") 975 like = f" LIKE {like}" if like else "" 976 977 scope = self.sql(expression, "scope") 978 scope = f" {scope}" if scope else "" 979 980 scope_kind = self.sql(expression, "scope_kind") 981 if scope_kind: 982 scope_kind = f" IN {scope_kind}" 983 984 starts_with = self.sql(expression, "starts_with") 985 if starts_with: 986 starts_with = f" STARTS WITH {starts_with}" 987 988 limit = self.sql(expression, "limit") 989 990 from_ = self.sql(expression, "from") 991 if from_: 992 from_ = f" FROM {from_}" 993 994 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
996 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 997 # Other dialects don't support all of the following parameters, so we need to 998 # generate default values as necessary to ensure the transpilation is correct 999 group = expression.args.get("group") 1000 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1001 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1002 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1003 1004 return self.func( 1005 "REGEXP_SUBSTR", 1006 expression.this, 1007 expression.expression, 1008 position, 1009 occurrence, 1010 parameters, 1011 group, 1012 )
1024 def describe_sql(self, expression: exp.Describe) -> str: 1025 # Default to table if kind is unknown 1026 kind_value = expression.args.get("kind") or "TABLE" 1027 kind = f" {kind_value}" if kind_value else "" 1028 this = f" {self.sql(expression, 'this')}" 1029 expressions = self.expressions(expression, flat=True) 1030 expressions = f" {expressions}" if expressions else "" 1031 return f"DESCRIBE{kind}{this}{expressions}"
1033 def generatedasidentitycolumnconstraint_sql( 1034 self, expression: exp.GeneratedAsIdentityColumnConstraint 1035 ) -> str: 1036 start = expression.args.get("start") 1037 start = f" START {start}" if start else "" 1038 increment = expression.args.get("increment") 1039 increment = f" INCREMENT {increment}" if increment else "" 1040 return f"AUTOINCREMENT{start}{increment}"
1052 def struct_sql(self, expression: exp.Struct) -> str: 1053 keys = [] 1054 values = [] 1055 1056 for i, e in enumerate(expression.expressions): 1057 if isinstance(e, exp.PropertyEQ): 1058 keys.append( 1059 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1060 ) 1061 values.append(e.expression) 1062 else: 1063 keys.append(exp.Literal.string(f"_{i}")) 1064 values.append(e) 1065 1066 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1068 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1069 if expression.args.get("weight") or expression.args.get("accuracy"): 1070 self.unsupported( 1071 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1072 ) 1073 1074 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- OUTER_UNION_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql