sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 date_delta_sql, 12 date_trunc_to_time, 13 datestrtodate_sql, 14 build_formatted_time, 15 if_sql, 16 inline_array_sql, 17 max_or_greatest, 18 min_or_least, 19 rename_func, 20 timestamptrunc_sql, 21 timestrtotime_sql, 22 var_map_sql, 23) 24from sqlglot.helper import flatten, is_float, is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _build_datetime( 33 name: str, kind: exp.DataType.Type, safe: bool = False 34) -> t.Callable[[t.List], exp.Func]: 35 def _builder(args: t.List) -> exp.Func: 36 value = seq_get(args, 0) 37 int_value = value is not None and is_int(value.name) 38 39 if isinstance(value, exp.Literal): 40 # Converts calls like `TO_TIME('01:02:03')` into casts 41 if len(args) == 1 and value.is_string and not int_value: 42 return exp.cast(value, kind) 43 44 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 45 # cases so we can transpile them, since they're relatively common 46 if kind == exp.DataType.Type.TIMESTAMP: 47 if int_value: 48 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 49 if not is_float(value.this): 50 return build_formatted_time(exp.StrToTime, "snowflake")(args) 51 52 if kind == exp.DataType.Type.DATE and not int_value: 53 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 54 formatted_exp.set("safe", safe) 55 return formatted_exp 56 57 return exp.Anonymous(this=name, expressions=args) 58 59 return _builder 60 61 62def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 63 expression = parser.build_var_map(args) 64 65 if isinstance(expression, exp.StarMap): 66 return expression 67 68 return exp.Struct( 69 expressions=[ 70 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 71 ] 72 ) 73 74 75def _build_datediff(args: t.List) -> exp.DateDiff: 76 return exp.DateDiff( 77 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 78 ) 79 80 81def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 82 def _builder(args: t.List) -> E: 83 return expr_type( 84 this=seq_get(args, 2), 85 expression=seq_get(args, 1), 86 unit=_map_date_part(seq_get(args, 0)), 87 ) 88 89 return _builder 90 91 92# https://docs.snowflake.com/en/sql-reference/functions/div0 93def _build_if_from_div0(args: t.List) -> exp.If: 94 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 95 true = exp.Literal.number(0) 96 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 97 return exp.If(this=cond, true=true, false=false) 98 99 100# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 101def _build_if_from_zeroifnull(args: t.List) -> exp.If: 102 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 103 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 104 105 106# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 107def _build_if_from_nullifzero(args: t.List) -> exp.If: 108 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 109 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 110 111 112def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 113 flag = expression.text("flag") 114 115 if "i" not in flag: 116 flag += "i" 117 118 return self.func( 119 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 120 ) 121 122 123def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 124 if len(args) == 3: 125 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 126 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 127 128 129def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 130 regexp_replace = exp.RegexpReplace.from_arg_list(args) 131 132 if not regexp_replace.args.get("replacement"): 133 regexp_replace.set("replacement", exp.Literal.string("")) 134 135 return regexp_replace 136 137 138def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 139 def _parse(self: Snowflake.Parser) -> exp.Show: 140 return self._parse_show_snowflake(*args, **kwargs) 141 142 return _parse 143 144 145DATE_PART_MAPPING = { 146 "Y": "YEAR", 147 "YY": "YEAR", 148 "YYY": "YEAR", 149 "YYYY": "YEAR", 150 "YR": "YEAR", 151 "YEARS": "YEAR", 152 "YRS": "YEAR", 153 "MM": "MONTH", 154 "MON": "MONTH", 155 "MONS": "MONTH", 156 "MONTHS": "MONTH", 157 "D": "DAY", 158 "DD": "DAY", 159 "DAYS": "DAY", 160 "DAYOFMONTH": "DAY", 161 "WEEKDAY": "DAYOFWEEK", 162 "DOW": "DAYOFWEEK", 163 "DW": "DAYOFWEEK", 164 "WEEKDAY_ISO": "DAYOFWEEKISO", 165 "DOW_ISO": "DAYOFWEEKISO", 166 "DW_ISO": "DAYOFWEEKISO", 167 "YEARDAY": "DAYOFYEAR", 168 "DOY": "DAYOFYEAR", 169 "DY": "DAYOFYEAR", 170 "W": "WEEK", 171 "WK": "WEEK", 172 "WEEKOFYEAR": "WEEK", 173 "WOY": "WEEK", 174 "WY": "WEEK", 175 "WEEK_ISO": "WEEKISO", 176 "WEEKOFYEARISO": "WEEKISO", 177 "WEEKOFYEAR_ISO": "WEEKISO", 178 "Q": "QUARTER", 179 "QTR": "QUARTER", 180 "QTRS": "QUARTER", 181 "QUARTERS": "QUARTER", 182 "H": "HOUR", 183 "HH": "HOUR", 184 "HR": "HOUR", 185 "HOURS": "HOUR", 186 "HRS": "HOUR", 187 "M": "MINUTE", 188 "MI": "MINUTE", 189 "MIN": "MINUTE", 190 "MINUTES": "MINUTE", 191 "MINS": "MINUTE", 192 "S": "SECOND", 193 "SEC": "SECOND", 194 "SECONDS": "SECOND", 195 "SECS": "SECOND", 196 "MS": "MILLISECOND", 197 "MSEC": "MILLISECOND", 198 "MILLISECONDS": "MILLISECOND", 199 "US": "MICROSECOND", 200 "USEC": "MICROSECOND", 201 "MICROSECONDS": "MICROSECOND", 202 "NS": "NANOSECOND", 203 "NSEC": "NANOSECOND", 204 "NANOSEC": "NANOSECOND", 205 "NSECOND": "NANOSECOND", 206 "NSECONDS": "NANOSECOND", 207 "NANOSECS": "NANOSECOND", 208 "EPOCH": "EPOCH_SECOND", 209 "EPOCH_SECONDS": "EPOCH_SECOND", 210 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 211 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 212 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 213 "TZH": "TIMEZONE_HOUR", 214 "TZM": "TIMEZONE_MINUTE", 215} 216 217 218@t.overload 219def _map_date_part(part: exp.Expression) -> exp.Var: 220 pass 221 222 223@t.overload 224def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 225 pass 226 227 228def _map_date_part(part): 229 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 230 return exp.var(mapped) if mapped else part 231 232 233def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 234 trunc = date_trunc_to_time(args) 235 trunc.set("unit", _map_date_part(trunc.args["unit"])) 236 return trunc 237 238 239def _build_timestamp_from_parts(args: t.List) -> exp.Func: 240 if len(args) == 2: 241 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 242 # so we parse this into Anonymous for now instead of introducing complexity 243 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 244 245 return exp.TimestampFromParts.from_arg_list(args) 246 247 248def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 249 """ 250 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 251 so we need to unqualify them. 252 253 Example: 254 >>> from sqlglot import parse_one 255 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 256 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 257 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 258 """ 259 if isinstance(expression, exp.Pivot) and expression.unpivot: 260 expression = transforms.unqualify_columns(expression) 261 262 return expression 263 264 265def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 266 assert isinstance(expression, exp.Create) 267 268 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 269 if expression.this in exp.DataType.NESTED_TYPES: 270 expression.set("expressions", None) 271 return expression 272 273 props = expression.args.get("properties") 274 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 275 for schema_expression in expression.this.expressions: 276 if isinstance(schema_expression, exp.ColumnDef): 277 column_type = schema_expression.kind 278 if isinstance(column_type, exp.DataType): 279 column_type.transform(_flatten_structured_type, copy=False) 280 281 return expression 282 283 284class Snowflake(Dialect): 285 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 286 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 287 NULL_ORDERING = "nulls_are_large" 288 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 289 SUPPORTS_USER_DEFINED_TYPES = False 290 SUPPORTS_SEMI_ANTI_JOIN = False 291 PREFER_CTE_ALIAS_COLUMN = True 292 TABLESAMPLE_SIZE_IS_PERCENT = True 293 COPY_PARAMS_ARE_CSV = False 294 295 TIME_MAPPING = { 296 "YYYY": "%Y", 297 "yyyy": "%Y", 298 "YY": "%y", 299 "yy": "%y", 300 "MMMM": "%B", 301 "mmmm": "%B", 302 "MON": "%b", 303 "mon": "%b", 304 "MM": "%m", 305 "mm": "%m", 306 "DD": "%d", 307 "dd": "%-d", 308 "DY": "%a", 309 "dy": "%w", 310 "HH24": "%H", 311 "hh24": "%H", 312 "HH12": "%I", 313 "hh12": "%I", 314 "MI": "%M", 315 "mi": "%M", 316 "SS": "%S", 317 "ss": "%S", 318 "FF": "%f", 319 "ff": "%f", 320 "FF6": "%f", 321 "ff6": "%f", 322 } 323 324 def quote_identifier(self, expression: E, identify: bool = True) -> E: 325 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 326 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 327 if ( 328 isinstance(expression, exp.Identifier) 329 and isinstance(expression.parent, exp.Table) 330 and expression.name.lower() == "dual" 331 ): 332 return expression # type: ignore 333 334 return super().quote_identifier(expression, identify=identify) 335 336 class Parser(parser.Parser): 337 IDENTIFY_PIVOT_STRINGS = True 338 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 339 COLON_IS_JSON_EXTRACT = True 340 341 ID_VAR_TOKENS = { 342 *parser.Parser.ID_VAR_TOKENS, 343 TokenType.MATCH_CONDITION, 344 } 345 346 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 347 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 352 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 353 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 354 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 355 this=seq_get(args, 1), expression=seq_get(args, 0) 356 ), 357 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 358 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 359 start=seq_get(args, 0), 360 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 361 step=seq_get(args, 2), 362 ), 363 "BITXOR": binary_from_function(exp.BitwiseXor), 364 "BIT_XOR": binary_from_function(exp.BitwiseXor), 365 "BOOLXOR": binary_from_function(exp.Xor), 366 "CONVERT_TIMEZONE": _build_convert_timezone, 367 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 368 "DATE_TRUNC": _date_trunc_to_time, 369 "DATEADD": _build_date_time_add(exp.DateAdd), 370 "DATEDIFF": _build_datediff, 371 "DIV0": _build_if_from_div0, 372 "FLATTEN": exp.Explode.from_arg_list, 373 "GET_PATH": lambda args, dialect: exp.JSONExtract( 374 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 375 ), 376 "IFF": exp.If.from_arg_list, 377 "LAST_DAY": lambda args: exp.LastDay( 378 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 379 ), 380 "LISTAGG": exp.GroupConcat.from_arg_list, 381 "MEDIAN": lambda args: exp.PercentileCont( 382 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 383 ), 384 "NULLIFZERO": _build_if_from_nullifzero, 385 "OBJECT_CONSTRUCT": _build_object_construct, 386 "REGEXP_REPLACE": _build_regexp_replace, 387 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 388 "RLIKE": exp.RegexpLike.from_arg_list, 389 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 390 "TIMEADD": _build_date_time_add(exp.TimeAdd), 391 "TIMEDIFF": _build_datediff, 392 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 393 "TIMESTAMPDIFF": _build_datediff, 394 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 395 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 396 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 397 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 398 "TO_NUMBER": lambda args: exp.ToNumber( 399 this=seq_get(args, 0), 400 format=seq_get(args, 1), 401 precision=seq_get(args, 2), 402 scale=seq_get(args, 3), 403 ), 404 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 405 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 406 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 407 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 408 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 409 "TO_VARCHAR": exp.ToChar.from_arg_list, 410 "ZEROIFNULL": _build_if_from_zeroifnull, 411 } 412 413 FUNCTION_PARSERS = { 414 **parser.Parser.FUNCTION_PARSERS, 415 "DATE_PART": lambda self: self._parse_date_part(), 416 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 417 } 418 FUNCTION_PARSERS.pop("TRIM") 419 420 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 421 422 RANGE_PARSERS = { 423 **parser.Parser.RANGE_PARSERS, 424 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 425 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 426 } 427 428 ALTER_PARSERS = { 429 **parser.Parser.ALTER_PARSERS, 430 "UNSET": lambda self: self.expression( 431 exp.Set, 432 tag=self._match_text_seq("TAG"), 433 expressions=self._parse_csv(self._parse_id_var), 434 unset=True, 435 ), 436 "SWAP": lambda self: self._parse_alter_table_swap(), 437 } 438 439 STATEMENT_PARSERS = { 440 **parser.Parser.STATEMENT_PARSERS, 441 TokenType.SHOW: lambda self: self._parse_show(), 442 } 443 444 PROPERTY_PARSERS = { 445 **parser.Parser.PROPERTY_PARSERS, 446 "LOCATION": lambda self: self._parse_location_property(), 447 } 448 449 TYPE_CONVERTER = { 450 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 451 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 452 } 453 454 SHOW_PARSERS = { 455 "SCHEMAS": _show_parser("SCHEMAS"), 456 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 457 "OBJECTS": _show_parser("OBJECTS"), 458 "TERSE OBJECTS": _show_parser("OBJECTS"), 459 "TABLES": _show_parser("TABLES"), 460 "TERSE TABLES": _show_parser("TABLES"), 461 "VIEWS": _show_parser("VIEWS"), 462 "TERSE VIEWS": _show_parser("VIEWS"), 463 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 464 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 466 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 468 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "SEQUENCES": _show_parser("SEQUENCES"), 470 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 471 "COLUMNS": _show_parser("COLUMNS"), 472 "USERS": _show_parser("USERS"), 473 "TERSE USERS": _show_parser("USERS"), 474 } 475 476 CONSTRAINT_PARSERS = { 477 **parser.Parser.CONSTRAINT_PARSERS, 478 "WITH": lambda self: self._parse_with_constraint(), 479 "MASKING": lambda self: self._parse_with_constraint(), 480 "PROJECTION": lambda self: self._parse_with_constraint(), 481 "TAG": lambda self: self._parse_with_constraint(), 482 } 483 484 STAGED_FILE_SINGLE_TOKENS = { 485 TokenType.DOT, 486 TokenType.MOD, 487 TokenType.SLASH, 488 } 489 490 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 491 492 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 493 494 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 495 496 LAMBDAS = { 497 **parser.Parser.LAMBDAS, 498 TokenType.ARROW: lambda self, expressions: self.expression( 499 exp.Lambda, 500 this=self._replace_lambda( 501 self._parse_conjunction(), 502 expressions, 503 ), 504 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 505 ), 506 } 507 508 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 509 if self._prev.token_type != TokenType.WITH: 510 self._retreat(self._index - 1) 511 512 if self._match_text_seq("MASKING", "POLICY"): 513 return self.expression( 514 exp.MaskingPolicyColumnConstraint, 515 this=self._parse_id_var(), 516 expressions=self._match(TokenType.USING) 517 and self._parse_wrapped_csv(self._parse_id_var), 518 ) 519 if self._match_text_seq("PROJECTION", "POLICY"): 520 return self.expression( 521 exp.ProjectionPolicyColumnConstraint, this=self._parse_id_var() 522 ) 523 if self._match(TokenType.TAG): 524 return self.expression( 525 exp.TagColumnConstraint, 526 expressions=self._parse_wrapped_csv(self._parse_property), 527 ) 528 529 return None 530 531 def _parse_create(self) -> exp.Create | exp.Command: 532 expression = super()._parse_create() 533 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 534 # Replace the Table node with the enclosed Identifier 535 expression.this.replace(expression.this.this) 536 537 return expression 538 539 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 540 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 541 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 542 this = self._parse_var() or self._parse_type() 543 544 if not this: 545 return None 546 547 self._match(TokenType.COMMA) 548 expression = self._parse_bitwise() 549 this = _map_date_part(this) 550 name = this.name.upper() 551 552 if name.startswith("EPOCH"): 553 if name == "EPOCH_MILLISECOND": 554 scale = 10**3 555 elif name == "EPOCH_MICROSECOND": 556 scale = 10**6 557 elif name == "EPOCH_NANOSECOND": 558 scale = 10**9 559 else: 560 scale = None 561 562 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 563 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 564 565 if scale: 566 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 567 568 return to_unix 569 570 return self.expression(exp.Extract, this=this, expression=expression) 571 572 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 573 if is_map: 574 # Keys are strings in Snowflake's objects, see also: 575 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 576 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 577 return self._parse_slice(self._parse_string()) 578 579 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 580 581 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 582 lateral = super()._parse_lateral() 583 if not lateral: 584 return lateral 585 586 if isinstance(lateral.this, exp.Explode): 587 table_alias = lateral.args.get("alias") 588 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 589 if table_alias and not table_alias.args.get("columns"): 590 table_alias.set("columns", columns) 591 elif not table_alias: 592 exp.alias_(lateral, "_flattened", table=columns, copy=False) 593 594 return lateral 595 596 def _parse_at_before(self, table: exp.Table) -> exp.Table: 597 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 598 index = self._index 599 if self._match_texts(("AT", "BEFORE")): 600 this = self._prev.text.upper() 601 kind = ( 602 self._match(TokenType.L_PAREN) 603 and self._match_texts(self.HISTORICAL_DATA_KIND) 604 and self._prev.text.upper() 605 ) 606 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 607 608 if expression: 609 self._match_r_paren() 610 when = self.expression( 611 exp.HistoricalData, this=this, kind=kind, expression=expression 612 ) 613 table.set("when", when) 614 else: 615 self._retreat(index) 616 617 return table 618 619 def _parse_table_parts( 620 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 621 ) -> exp.Table: 622 # https://docs.snowflake.com/en/user-guide/querying-stage 623 if self._match(TokenType.STRING, advance=False): 624 table = self._parse_string() 625 elif self._match_text_seq("@", advance=False): 626 table = self._parse_location_path() 627 else: 628 table = None 629 630 if table: 631 file_format = None 632 pattern = None 633 634 wrapped = self._match(TokenType.L_PAREN) 635 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 636 if self._match_text_seq("FILE_FORMAT", "=>"): 637 file_format = self._parse_string() or super()._parse_table_parts( 638 is_db_reference=is_db_reference 639 ) 640 elif self._match_text_seq("PATTERN", "=>"): 641 pattern = self._parse_string() 642 else: 643 break 644 645 self._match(TokenType.COMMA) 646 647 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 648 else: 649 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 650 651 return self._parse_at_before(table) 652 653 def _parse_id_var( 654 self, 655 any_token: bool = True, 656 tokens: t.Optional[t.Collection[TokenType]] = None, 657 ) -> t.Optional[exp.Expression]: 658 if self._match_text_seq("IDENTIFIER", "("): 659 identifier = ( 660 super()._parse_id_var(any_token=any_token, tokens=tokens) 661 or self._parse_string() 662 ) 663 self._match_r_paren() 664 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 665 666 return super()._parse_id_var(any_token=any_token, tokens=tokens) 667 668 def _parse_show_snowflake(self, this: str) -> exp.Show: 669 scope = None 670 scope_kind = None 671 672 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 673 # which is syntactically valid but has no effect on the output 674 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 675 676 history = self._match_text_seq("HISTORY") 677 678 like = self._parse_string() if self._match(TokenType.LIKE) else None 679 680 if self._match(TokenType.IN): 681 if self._match_text_seq("ACCOUNT"): 682 scope_kind = "ACCOUNT" 683 elif self._match_set(self.DB_CREATABLES): 684 scope_kind = self._prev.text.upper() 685 if self._curr: 686 scope = self._parse_table_parts() 687 elif self._curr: 688 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 689 scope = self._parse_table_parts() 690 691 return self.expression( 692 exp.Show, 693 **{ 694 "terse": terse, 695 "this": this, 696 "history": history, 697 "like": like, 698 "scope": scope, 699 "scope_kind": scope_kind, 700 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 701 "limit": self._parse_limit(), 702 "from": self._parse_string() if self._match(TokenType.FROM) else None, 703 }, 704 ) 705 706 def _parse_alter_table_swap(self) -> exp.SwapTable: 707 self._match_text_seq("WITH") 708 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 709 710 def _parse_location_property(self) -> exp.LocationProperty: 711 self._match(TokenType.EQ) 712 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 713 714 def _parse_file_location(self) -> t.Optional[exp.Expression]: 715 # Parse either a subquery or a staged file 716 return ( 717 self._parse_select(table=True) 718 if self._match(TokenType.L_PAREN, advance=False) 719 else self._parse_table_parts() 720 ) 721 722 def _parse_location_path(self) -> exp.Var: 723 parts = [self._advance_any(ignore_reserved=True)] 724 725 # We avoid consuming a comma token because external tables like @foo and @bar 726 # can be joined in a query with a comma separator, as well as closing paren 727 # in case of subqueries 728 while self._is_connected() and not self._match_set( 729 (TokenType.COMMA, TokenType.R_PAREN), advance=False 730 ): 731 parts.append(self._advance_any(ignore_reserved=True)) 732 733 return exp.var("".join(part.text for part in parts if part)) 734 735 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 736 this = super()._parse_lambda_arg() 737 738 if not this: 739 return this 740 741 typ = self._parse_types() 742 743 if typ: 744 return self.expression(exp.Cast, this=this, to=typ) 745 746 return this 747 748 class Tokenizer(tokens.Tokenizer): 749 STRING_ESCAPES = ["\\", "'"] 750 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 751 RAW_STRINGS = ["$$"] 752 COMMENTS = ["--", "//", ("/*", "*/")] 753 754 KEYWORDS = { 755 **tokens.Tokenizer.KEYWORDS, 756 "BYTEINT": TokenType.INT, 757 "CHAR VARYING": TokenType.VARCHAR, 758 "CHARACTER VARYING": TokenType.VARCHAR, 759 "EXCLUDE": TokenType.EXCEPT, 760 "ILIKE ANY": TokenType.ILIKE_ANY, 761 "LIKE ANY": TokenType.LIKE_ANY, 762 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 763 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 764 "MINUS": TokenType.EXCEPT, 765 "NCHAR VARYING": TokenType.VARCHAR, 766 "PUT": TokenType.COMMAND, 767 "REMOVE": TokenType.COMMAND, 768 "RM": TokenType.COMMAND, 769 "SAMPLE": TokenType.TABLE_SAMPLE, 770 "SQL_DOUBLE": TokenType.DOUBLE, 771 "SQL_VARCHAR": TokenType.VARCHAR, 772 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 773 "TAG": TokenType.TAG, 774 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 775 "TOP": TokenType.TOP, 776 "WAREHOUSE": TokenType.WAREHOUSE, 777 "STREAMLIT": TokenType.STREAMLIT, 778 } 779 780 SINGLE_TOKENS = { 781 **tokens.Tokenizer.SINGLE_TOKENS, 782 "$": TokenType.PARAMETER, 783 } 784 785 VAR_SINGLE_TOKENS = {"$"} 786 787 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 788 789 class Generator(generator.Generator): 790 PARAMETER_TOKEN = "$" 791 MATCHED_BY_SOURCE = False 792 SINGLE_STRING_INTERVAL = True 793 JOIN_HINTS = False 794 TABLE_HINTS = False 795 QUERY_HINTS = False 796 AGGREGATE_FILTER_SUPPORTED = False 797 SUPPORTS_TABLE_COPY = False 798 COLLATE_IS_FUNC = True 799 LIMIT_ONLY_LITERALS = True 800 JSON_KEY_VALUE_PAIR_SEP = "," 801 INSERT_OVERWRITE = " OVERWRITE INTO" 802 STRUCT_DELIMITER = ("(", ")") 803 COPY_PARAMS_ARE_WRAPPED = False 804 COPY_PARAMS_EQ_REQUIRED = True 805 STAR_EXCEPT = "EXCLUDE" 806 807 TRANSFORMS = { 808 **generator.Generator.TRANSFORMS, 809 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 810 exp.ArgMax: rename_func("MAX_BY"), 811 exp.ArgMin: rename_func("MIN_BY"), 812 exp.Array: inline_array_sql, 813 exp.ArrayConcat: rename_func("ARRAY_CAT"), 814 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 815 exp.AtTimeZone: lambda self, e: self.func( 816 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 817 ), 818 exp.BitwiseXor: rename_func("BITXOR"), 819 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 820 exp.DateAdd: date_delta_sql("DATEADD"), 821 exp.DateDiff: date_delta_sql("DATEDIFF"), 822 exp.DateStrToDate: datestrtodate_sql, 823 exp.DayOfMonth: rename_func("DAYOFMONTH"), 824 exp.DayOfWeek: rename_func("DAYOFWEEK"), 825 exp.DayOfYear: rename_func("DAYOFYEAR"), 826 exp.Explode: rename_func("FLATTEN"), 827 exp.Extract: rename_func("DATE_PART"), 828 exp.FromTimeZone: lambda self, e: self.func( 829 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 830 ), 831 exp.GenerateSeries: lambda self, e: self.func( 832 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 833 ), 834 exp.GroupConcat: rename_func("LISTAGG"), 835 exp.If: if_sql(name="IFF", false_value="NULL"), 836 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 837 exp.JSONExtractScalar: lambda self, e: self.func( 838 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 839 ), 840 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 841 exp.JSONPathRoot: lambda *_: "", 842 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 843 exp.LogicalOr: rename_func("BOOLOR_AGG"), 844 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 845 exp.Max: max_or_greatest, 846 exp.Min: min_or_least, 847 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 848 exp.PercentileCont: transforms.preprocess( 849 [transforms.add_within_group_for_percentiles] 850 ), 851 exp.PercentileDisc: transforms.preprocess( 852 [transforms.add_within_group_for_percentiles] 853 ), 854 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 855 exp.RegexpILike: _regexpilike_sql, 856 exp.Rand: rename_func("RANDOM"), 857 exp.Select: transforms.preprocess( 858 [ 859 transforms.eliminate_distinct_on, 860 transforms.explode_to_unnest(), 861 transforms.eliminate_semi_and_anti_joins, 862 ] 863 ), 864 exp.SHA: rename_func("SHA1"), 865 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 866 exp.StartsWith: rename_func("STARTSWITH"), 867 exp.StrPosition: lambda self, e: self.func( 868 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 869 ), 870 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 871 exp.Stuff: rename_func("INSERT"), 872 exp.TimeAdd: date_delta_sql("TIMEADD"), 873 exp.TimestampDiff: lambda self, e: self.func( 874 "TIMESTAMPDIFF", e.unit, e.expression, e.this 875 ), 876 exp.TimestampTrunc: timestamptrunc_sql(), 877 exp.TimeStrToTime: timestrtotime_sql, 878 exp.TimeToStr: lambda self, e: self.func( 879 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 880 ), 881 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 882 exp.ToArray: rename_func("TO_ARRAY"), 883 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 884 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 885 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 886 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 887 exp.TsOrDsToDate: lambda self, e: self.func( 888 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 889 ), 890 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 891 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 892 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 893 exp.Xor: rename_func("BOOLXOR"), 894 } 895 896 SUPPORTED_JSON_PATH_PARTS = { 897 exp.JSONPathKey, 898 exp.JSONPathRoot, 899 exp.JSONPathSubscript, 900 } 901 902 TYPE_MAPPING = { 903 **generator.Generator.TYPE_MAPPING, 904 exp.DataType.Type.NESTED: "OBJECT", 905 exp.DataType.Type.STRUCT: "OBJECT", 906 } 907 908 PROPERTIES_LOCATION = { 909 **generator.Generator.PROPERTIES_LOCATION, 910 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 911 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 912 } 913 914 UNSUPPORTED_VALUES_EXPRESSIONS = { 915 exp.Map, 916 exp.StarMap, 917 exp.Struct, 918 exp.VarMap, 919 } 920 921 def with_properties(self, properties: exp.Properties) -> str: 922 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 923 924 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 925 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 926 values_as_table = False 927 928 return super().values_sql(expression, values_as_table=values_as_table) 929 930 def datatype_sql(self, expression: exp.DataType) -> str: 931 expressions = expression.expressions 932 if ( 933 expressions 934 and expression.is_type(*exp.DataType.STRUCT_TYPES) 935 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 936 ): 937 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 938 return "OBJECT" 939 940 return super().datatype_sql(expression) 941 942 def tonumber_sql(self, expression: exp.ToNumber) -> str: 943 return self.func( 944 "TO_NUMBER", 945 expression.this, 946 expression.args.get("format"), 947 expression.args.get("precision"), 948 expression.args.get("scale"), 949 ) 950 951 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 952 milli = expression.args.get("milli") 953 if milli is not None: 954 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 955 expression.set("nano", milli_to_nano) 956 957 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 958 959 def trycast_sql(self, expression: exp.TryCast) -> str: 960 value = expression.this 961 962 if value.type is None: 963 from sqlglot.optimizer.annotate_types import annotate_types 964 965 value = annotate_types(value) 966 967 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 968 return super().trycast_sql(expression) 969 970 # TRY_CAST only works for string values in Snowflake 971 return self.cast_sql(expression) 972 973 def log_sql(self, expression: exp.Log) -> str: 974 if not expression.expression: 975 return self.func("LN", expression.this) 976 977 return super().log_sql(expression) 978 979 def unnest_sql(self, expression: exp.Unnest) -> str: 980 unnest_alias = expression.args.get("alias") 981 offset = expression.args.get("offset") 982 983 columns = [ 984 exp.to_identifier("seq"), 985 exp.to_identifier("key"), 986 exp.to_identifier("path"), 987 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 988 seq_get(unnest_alias.columns if unnest_alias else [], 0) 989 or exp.to_identifier("value"), 990 exp.to_identifier("this"), 991 ] 992 993 if unnest_alias: 994 unnest_alias.set("columns", columns) 995 else: 996 unnest_alias = exp.TableAlias(this="_u", columns=columns) 997 998 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 999 alias = self.sql(unnest_alias) 1000 alias = f" AS {alias}" if alias else "" 1001 return f"{explode}{alias}" 1002 1003 def show_sql(self, expression: exp.Show) -> str: 1004 terse = "TERSE " if expression.args.get("terse") else "" 1005 history = " HISTORY" if expression.args.get("history") else "" 1006 like = self.sql(expression, "like") 1007 like = f" LIKE {like}" if like else "" 1008 1009 scope = self.sql(expression, "scope") 1010 scope = f" {scope}" if scope else "" 1011 1012 scope_kind = self.sql(expression, "scope_kind") 1013 if scope_kind: 1014 scope_kind = f" IN {scope_kind}" 1015 1016 starts_with = self.sql(expression, "starts_with") 1017 if starts_with: 1018 starts_with = f" STARTS WITH {starts_with}" 1019 1020 limit = self.sql(expression, "limit") 1021 1022 from_ = self.sql(expression, "from") 1023 if from_: 1024 from_ = f" FROM {from_}" 1025 1026 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1027 1028 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1029 # Other dialects don't support all of the following parameters, so we need to 1030 # generate default values as necessary to ensure the transpilation is correct 1031 group = expression.args.get("group") 1032 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1033 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1034 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1035 1036 return self.func( 1037 "REGEXP_SUBSTR", 1038 expression.this, 1039 expression.expression, 1040 position, 1041 occurrence, 1042 parameters, 1043 group, 1044 ) 1045 1046 def except_op(self, expression: exp.Except) -> str: 1047 if not expression.args.get("distinct"): 1048 self.unsupported("EXCEPT with All is not supported in Snowflake") 1049 return super().except_op(expression) 1050 1051 def intersect_op(self, expression: exp.Intersect) -> str: 1052 if not expression.args.get("distinct"): 1053 self.unsupported("INTERSECT with All is not supported in Snowflake") 1054 return super().intersect_op(expression) 1055 1056 def describe_sql(self, expression: exp.Describe) -> str: 1057 # Default to table if kind is unknown 1058 kind_value = expression.args.get("kind") or "TABLE" 1059 kind = f" {kind_value}" if kind_value else "" 1060 this = f" {self.sql(expression, 'this')}" 1061 expressions = self.expressions(expression, flat=True) 1062 expressions = f" {expressions}" if expressions else "" 1063 return f"DESCRIBE{kind}{this}{expressions}" 1064 1065 def generatedasidentitycolumnconstraint_sql( 1066 self, expression: exp.GeneratedAsIdentityColumnConstraint 1067 ) -> str: 1068 start = expression.args.get("start") 1069 start = f" START {start}" if start else "" 1070 increment = expression.args.get("increment") 1071 increment = f" INCREMENT {increment}" if increment else "" 1072 return f"AUTOINCREMENT{start}{increment}" 1073 1074 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1075 this = self.sql(expression, "this") 1076 return f"SWAP WITH {this}" 1077 1078 def cluster_sql(self, expression: exp.Cluster) -> str: 1079 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1080 1081 def struct_sql(self, expression: exp.Struct) -> str: 1082 keys = [] 1083 values = [] 1084 1085 for i, e in enumerate(expression.expressions): 1086 if isinstance(e, exp.PropertyEQ): 1087 keys.append( 1088 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1089 ) 1090 values.append(e.expression) 1091 else: 1092 keys.append(exp.Literal.string(f"_{i}")) 1093 values.append(e) 1094 1095 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1096 1097 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1098 if expression.args.get("weight") or expression.args.get("accuracy"): 1099 self.unsupported( 1100 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1101 ) 1102 1103 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1104 1105 def alterset_sql(self, expression: exp.AlterSet) -> str: 1106 exprs = self.expressions(expression, flat=True) 1107 exprs = f" {exprs}" if exprs else "" 1108 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1109 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1110 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1111 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1112 tag = self.expressions(expression, key="tag", flat=True) 1113 tag = f" TAG {tag}" if tag else "" 1114 1115 return f"SET{exprs}{file_format}{copy_options}{tag}"
285class Snowflake(Dialect): 286 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 287 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 288 NULL_ORDERING = "nulls_are_large" 289 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 290 SUPPORTS_USER_DEFINED_TYPES = False 291 SUPPORTS_SEMI_ANTI_JOIN = False 292 PREFER_CTE_ALIAS_COLUMN = True 293 TABLESAMPLE_SIZE_IS_PERCENT = True 294 COPY_PARAMS_ARE_CSV = False 295 296 TIME_MAPPING = { 297 "YYYY": "%Y", 298 "yyyy": "%Y", 299 "YY": "%y", 300 "yy": "%y", 301 "MMMM": "%B", 302 "mmmm": "%B", 303 "MON": "%b", 304 "mon": "%b", 305 "MM": "%m", 306 "mm": "%m", 307 "DD": "%d", 308 "dd": "%-d", 309 "DY": "%a", 310 "dy": "%w", 311 "HH24": "%H", 312 "hh24": "%H", 313 "HH12": "%I", 314 "hh12": "%I", 315 "MI": "%M", 316 "mi": "%M", 317 "SS": "%S", 318 "ss": "%S", 319 "FF": "%f", 320 "ff": "%f", 321 "FF6": "%f", 322 "ff6": "%f", 323 } 324 325 def quote_identifier(self, expression: E, identify: bool = True) -> E: 326 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 327 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 328 if ( 329 isinstance(expression, exp.Identifier) 330 and isinstance(expression.parent, exp.Table) 331 and expression.name.lower() == "dual" 332 ): 333 return expression # type: ignore 334 335 return super().quote_identifier(expression, identify=identify) 336 337 class Parser(parser.Parser): 338 IDENTIFY_PIVOT_STRINGS = True 339 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 340 COLON_IS_JSON_EXTRACT = True 341 342 ID_VAR_TOKENS = { 343 *parser.Parser.ID_VAR_TOKENS, 344 TokenType.MATCH_CONDITION, 345 } 346 347 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 348 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 349 350 FUNCTIONS = { 351 **parser.Parser.FUNCTIONS, 352 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 353 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 354 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 355 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 356 this=seq_get(args, 1), expression=seq_get(args, 0) 357 ), 358 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 359 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 360 start=seq_get(args, 0), 361 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 362 step=seq_get(args, 2), 363 ), 364 "BITXOR": binary_from_function(exp.BitwiseXor), 365 "BIT_XOR": binary_from_function(exp.BitwiseXor), 366 "BOOLXOR": binary_from_function(exp.Xor), 367 "CONVERT_TIMEZONE": _build_convert_timezone, 368 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 369 "DATE_TRUNC": _date_trunc_to_time, 370 "DATEADD": _build_date_time_add(exp.DateAdd), 371 "DATEDIFF": _build_datediff, 372 "DIV0": _build_if_from_div0, 373 "FLATTEN": exp.Explode.from_arg_list, 374 "GET_PATH": lambda args, dialect: exp.JSONExtract( 375 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 376 ), 377 "IFF": exp.If.from_arg_list, 378 "LAST_DAY": lambda args: exp.LastDay( 379 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 380 ), 381 "LISTAGG": exp.GroupConcat.from_arg_list, 382 "MEDIAN": lambda args: exp.PercentileCont( 383 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 384 ), 385 "NULLIFZERO": _build_if_from_nullifzero, 386 "OBJECT_CONSTRUCT": _build_object_construct, 387 "REGEXP_REPLACE": _build_regexp_replace, 388 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 389 "RLIKE": exp.RegexpLike.from_arg_list, 390 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 391 "TIMEADD": _build_date_time_add(exp.TimeAdd), 392 "TIMEDIFF": _build_datediff, 393 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 394 "TIMESTAMPDIFF": _build_datediff, 395 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 396 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 397 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 398 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 399 "TO_NUMBER": lambda args: exp.ToNumber( 400 this=seq_get(args, 0), 401 format=seq_get(args, 1), 402 precision=seq_get(args, 2), 403 scale=seq_get(args, 3), 404 ), 405 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 406 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 407 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 408 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 409 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 410 "TO_VARCHAR": exp.ToChar.from_arg_list, 411 "ZEROIFNULL": _build_if_from_zeroifnull, 412 } 413 414 FUNCTION_PARSERS = { 415 **parser.Parser.FUNCTION_PARSERS, 416 "DATE_PART": lambda self: self._parse_date_part(), 417 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 418 } 419 FUNCTION_PARSERS.pop("TRIM") 420 421 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 422 423 RANGE_PARSERS = { 424 **parser.Parser.RANGE_PARSERS, 425 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 426 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 427 } 428 429 ALTER_PARSERS = { 430 **parser.Parser.ALTER_PARSERS, 431 "UNSET": lambda self: self.expression( 432 exp.Set, 433 tag=self._match_text_seq("TAG"), 434 expressions=self._parse_csv(self._parse_id_var), 435 unset=True, 436 ), 437 "SWAP": lambda self: self._parse_alter_table_swap(), 438 } 439 440 STATEMENT_PARSERS = { 441 **parser.Parser.STATEMENT_PARSERS, 442 TokenType.SHOW: lambda self: self._parse_show(), 443 } 444 445 PROPERTY_PARSERS = { 446 **parser.Parser.PROPERTY_PARSERS, 447 "LOCATION": lambda self: self._parse_location_property(), 448 } 449 450 TYPE_CONVERTER = { 451 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 452 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 453 } 454 455 SHOW_PARSERS = { 456 "SCHEMAS": _show_parser("SCHEMAS"), 457 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 458 "OBJECTS": _show_parser("OBJECTS"), 459 "TERSE OBJECTS": _show_parser("OBJECTS"), 460 "TABLES": _show_parser("TABLES"), 461 "TERSE TABLES": _show_parser("TABLES"), 462 "VIEWS": _show_parser("VIEWS"), 463 "TERSE VIEWS": _show_parser("VIEWS"), 464 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 466 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 468 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 470 "SEQUENCES": _show_parser("SEQUENCES"), 471 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 472 "COLUMNS": _show_parser("COLUMNS"), 473 "USERS": _show_parser("USERS"), 474 "TERSE USERS": _show_parser("USERS"), 475 } 476 477 CONSTRAINT_PARSERS = { 478 **parser.Parser.CONSTRAINT_PARSERS, 479 "WITH": lambda self: self._parse_with_constraint(), 480 "MASKING": lambda self: self._parse_with_constraint(), 481 "PROJECTION": lambda self: self._parse_with_constraint(), 482 "TAG": lambda self: self._parse_with_constraint(), 483 } 484 485 STAGED_FILE_SINGLE_TOKENS = { 486 TokenType.DOT, 487 TokenType.MOD, 488 TokenType.SLASH, 489 } 490 491 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 492 493 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 494 495 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 496 497 LAMBDAS = { 498 **parser.Parser.LAMBDAS, 499 TokenType.ARROW: lambda self, expressions: self.expression( 500 exp.Lambda, 501 this=self._replace_lambda( 502 self._parse_conjunction(), 503 expressions, 504 ), 505 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 506 ), 507 } 508 509 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 510 if self._prev.token_type != TokenType.WITH: 511 self._retreat(self._index - 1) 512 513 if self._match_text_seq("MASKING", "POLICY"): 514 return self.expression( 515 exp.MaskingPolicyColumnConstraint, 516 this=self._parse_id_var(), 517 expressions=self._match(TokenType.USING) 518 and self._parse_wrapped_csv(self._parse_id_var), 519 ) 520 if self._match_text_seq("PROJECTION", "POLICY"): 521 return self.expression( 522 exp.ProjectionPolicyColumnConstraint, this=self._parse_id_var() 523 ) 524 if self._match(TokenType.TAG): 525 return self.expression( 526 exp.TagColumnConstraint, 527 expressions=self._parse_wrapped_csv(self._parse_property), 528 ) 529 530 return None 531 532 def _parse_create(self) -> exp.Create | exp.Command: 533 expression = super()._parse_create() 534 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 535 # Replace the Table node with the enclosed Identifier 536 expression.this.replace(expression.this.this) 537 538 return expression 539 540 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 541 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 542 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 543 this = self._parse_var() or self._parse_type() 544 545 if not this: 546 return None 547 548 self._match(TokenType.COMMA) 549 expression = self._parse_bitwise() 550 this = _map_date_part(this) 551 name = this.name.upper() 552 553 if name.startswith("EPOCH"): 554 if name == "EPOCH_MILLISECOND": 555 scale = 10**3 556 elif name == "EPOCH_MICROSECOND": 557 scale = 10**6 558 elif name == "EPOCH_NANOSECOND": 559 scale = 10**9 560 else: 561 scale = None 562 563 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 564 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 565 566 if scale: 567 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 568 569 return to_unix 570 571 return self.expression(exp.Extract, this=this, expression=expression) 572 573 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 574 if is_map: 575 # Keys are strings in Snowflake's objects, see also: 576 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 577 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 578 return self._parse_slice(self._parse_string()) 579 580 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 581 582 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 583 lateral = super()._parse_lateral() 584 if not lateral: 585 return lateral 586 587 if isinstance(lateral.this, exp.Explode): 588 table_alias = lateral.args.get("alias") 589 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 590 if table_alias and not table_alias.args.get("columns"): 591 table_alias.set("columns", columns) 592 elif not table_alias: 593 exp.alias_(lateral, "_flattened", table=columns, copy=False) 594 595 return lateral 596 597 def _parse_at_before(self, table: exp.Table) -> exp.Table: 598 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 599 index = self._index 600 if self._match_texts(("AT", "BEFORE")): 601 this = self._prev.text.upper() 602 kind = ( 603 self._match(TokenType.L_PAREN) 604 and self._match_texts(self.HISTORICAL_DATA_KIND) 605 and self._prev.text.upper() 606 ) 607 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 608 609 if expression: 610 self._match_r_paren() 611 when = self.expression( 612 exp.HistoricalData, this=this, kind=kind, expression=expression 613 ) 614 table.set("when", when) 615 else: 616 self._retreat(index) 617 618 return table 619 620 def _parse_table_parts( 621 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 622 ) -> exp.Table: 623 # https://docs.snowflake.com/en/user-guide/querying-stage 624 if self._match(TokenType.STRING, advance=False): 625 table = self._parse_string() 626 elif self._match_text_seq("@", advance=False): 627 table = self._parse_location_path() 628 else: 629 table = None 630 631 if table: 632 file_format = None 633 pattern = None 634 635 wrapped = self._match(TokenType.L_PAREN) 636 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 637 if self._match_text_seq("FILE_FORMAT", "=>"): 638 file_format = self._parse_string() or super()._parse_table_parts( 639 is_db_reference=is_db_reference 640 ) 641 elif self._match_text_seq("PATTERN", "=>"): 642 pattern = self._parse_string() 643 else: 644 break 645 646 self._match(TokenType.COMMA) 647 648 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 649 else: 650 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 651 652 return self._parse_at_before(table) 653 654 def _parse_id_var( 655 self, 656 any_token: bool = True, 657 tokens: t.Optional[t.Collection[TokenType]] = None, 658 ) -> t.Optional[exp.Expression]: 659 if self._match_text_seq("IDENTIFIER", "("): 660 identifier = ( 661 super()._parse_id_var(any_token=any_token, tokens=tokens) 662 or self._parse_string() 663 ) 664 self._match_r_paren() 665 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 666 667 return super()._parse_id_var(any_token=any_token, tokens=tokens) 668 669 def _parse_show_snowflake(self, this: str) -> exp.Show: 670 scope = None 671 scope_kind = None 672 673 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 674 # which is syntactically valid but has no effect on the output 675 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 676 677 history = self._match_text_seq("HISTORY") 678 679 like = self._parse_string() if self._match(TokenType.LIKE) else None 680 681 if self._match(TokenType.IN): 682 if self._match_text_seq("ACCOUNT"): 683 scope_kind = "ACCOUNT" 684 elif self._match_set(self.DB_CREATABLES): 685 scope_kind = self._prev.text.upper() 686 if self._curr: 687 scope = self._parse_table_parts() 688 elif self._curr: 689 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 690 scope = self._parse_table_parts() 691 692 return self.expression( 693 exp.Show, 694 **{ 695 "terse": terse, 696 "this": this, 697 "history": history, 698 "like": like, 699 "scope": scope, 700 "scope_kind": scope_kind, 701 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 702 "limit": self._parse_limit(), 703 "from": self._parse_string() if self._match(TokenType.FROM) else None, 704 }, 705 ) 706 707 def _parse_alter_table_swap(self) -> exp.SwapTable: 708 self._match_text_seq("WITH") 709 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 710 711 def _parse_location_property(self) -> exp.LocationProperty: 712 self._match(TokenType.EQ) 713 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 714 715 def _parse_file_location(self) -> t.Optional[exp.Expression]: 716 # Parse either a subquery or a staged file 717 return ( 718 self._parse_select(table=True) 719 if self._match(TokenType.L_PAREN, advance=False) 720 else self._parse_table_parts() 721 ) 722 723 def _parse_location_path(self) -> exp.Var: 724 parts = [self._advance_any(ignore_reserved=True)] 725 726 # We avoid consuming a comma token because external tables like @foo and @bar 727 # can be joined in a query with a comma separator, as well as closing paren 728 # in case of subqueries 729 while self._is_connected() and not self._match_set( 730 (TokenType.COMMA, TokenType.R_PAREN), advance=False 731 ): 732 parts.append(self._advance_any(ignore_reserved=True)) 733 734 return exp.var("".join(part.text for part in parts if part)) 735 736 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 737 this = super()._parse_lambda_arg() 738 739 if not this: 740 return this 741 742 typ = self._parse_types() 743 744 if typ: 745 return self.expression(exp.Cast, this=this, to=typ) 746 747 return this 748 749 class Tokenizer(tokens.Tokenizer): 750 STRING_ESCAPES = ["\\", "'"] 751 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 752 RAW_STRINGS = ["$$"] 753 COMMENTS = ["--", "//", ("/*", "*/")] 754 755 KEYWORDS = { 756 **tokens.Tokenizer.KEYWORDS, 757 "BYTEINT": TokenType.INT, 758 "CHAR VARYING": TokenType.VARCHAR, 759 "CHARACTER VARYING": TokenType.VARCHAR, 760 "EXCLUDE": TokenType.EXCEPT, 761 "ILIKE ANY": TokenType.ILIKE_ANY, 762 "LIKE ANY": TokenType.LIKE_ANY, 763 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 764 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 765 "MINUS": TokenType.EXCEPT, 766 "NCHAR VARYING": TokenType.VARCHAR, 767 "PUT": TokenType.COMMAND, 768 "REMOVE": TokenType.COMMAND, 769 "RM": TokenType.COMMAND, 770 "SAMPLE": TokenType.TABLE_SAMPLE, 771 "SQL_DOUBLE": TokenType.DOUBLE, 772 "SQL_VARCHAR": TokenType.VARCHAR, 773 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 774 "TAG": TokenType.TAG, 775 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 776 "TOP": TokenType.TOP, 777 "WAREHOUSE": TokenType.WAREHOUSE, 778 "STREAMLIT": TokenType.STREAMLIT, 779 } 780 781 SINGLE_TOKENS = { 782 **tokens.Tokenizer.SINGLE_TOKENS, 783 "$": TokenType.PARAMETER, 784 } 785 786 VAR_SINGLE_TOKENS = {"$"} 787 788 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 789 790 class Generator(generator.Generator): 791 PARAMETER_TOKEN = "$" 792 MATCHED_BY_SOURCE = False 793 SINGLE_STRING_INTERVAL = True 794 JOIN_HINTS = False 795 TABLE_HINTS = False 796 QUERY_HINTS = False 797 AGGREGATE_FILTER_SUPPORTED = False 798 SUPPORTS_TABLE_COPY = False 799 COLLATE_IS_FUNC = True 800 LIMIT_ONLY_LITERALS = True 801 JSON_KEY_VALUE_PAIR_SEP = "," 802 INSERT_OVERWRITE = " OVERWRITE INTO" 803 STRUCT_DELIMITER = ("(", ")") 804 COPY_PARAMS_ARE_WRAPPED = False 805 COPY_PARAMS_EQ_REQUIRED = True 806 STAR_EXCEPT = "EXCLUDE" 807 808 TRANSFORMS = { 809 **generator.Generator.TRANSFORMS, 810 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 811 exp.ArgMax: rename_func("MAX_BY"), 812 exp.ArgMin: rename_func("MIN_BY"), 813 exp.Array: inline_array_sql, 814 exp.ArrayConcat: rename_func("ARRAY_CAT"), 815 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 816 exp.AtTimeZone: lambda self, e: self.func( 817 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 818 ), 819 exp.BitwiseXor: rename_func("BITXOR"), 820 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 821 exp.DateAdd: date_delta_sql("DATEADD"), 822 exp.DateDiff: date_delta_sql("DATEDIFF"), 823 exp.DateStrToDate: datestrtodate_sql, 824 exp.DayOfMonth: rename_func("DAYOFMONTH"), 825 exp.DayOfWeek: rename_func("DAYOFWEEK"), 826 exp.DayOfYear: rename_func("DAYOFYEAR"), 827 exp.Explode: rename_func("FLATTEN"), 828 exp.Extract: rename_func("DATE_PART"), 829 exp.FromTimeZone: lambda self, e: self.func( 830 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 831 ), 832 exp.GenerateSeries: lambda self, e: self.func( 833 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 834 ), 835 exp.GroupConcat: rename_func("LISTAGG"), 836 exp.If: if_sql(name="IFF", false_value="NULL"), 837 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 838 exp.JSONExtractScalar: lambda self, e: self.func( 839 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 840 ), 841 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 842 exp.JSONPathRoot: lambda *_: "", 843 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 844 exp.LogicalOr: rename_func("BOOLOR_AGG"), 845 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 846 exp.Max: max_or_greatest, 847 exp.Min: min_or_least, 848 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 849 exp.PercentileCont: transforms.preprocess( 850 [transforms.add_within_group_for_percentiles] 851 ), 852 exp.PercentileDisc: transforms.preprocess( 853 [transforms.add_within_group_for_percentiles] 854 ), 855 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 856 exp.RegexpILike: _regexpilike_sql, 857 exp.Rand: rename_func("RANDOM"), 858 exp.Select: transforms.preprocess( 859 [ 860 transforms.eliminate_distinct_on, 861 transforms.explode_to_unnest(), 862 transforms.eliminate_semi_and_anti_joins, 863 ] 864 ), 865 exp.SHA: rename_func("SHA1"), 866 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 867 exp.StartsWith: rename_func("STARTSWITH"), 868 exp.StrPosition: lambda self, e: self.func( 869 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 870 ), 871 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 872 exp.Stuff: rename_func("INSERT"), 873 exp.TimeAdd: date_delta_sql("TIMEADD"), 874 exp.TimestampDiff: lambda self, e: self.func( 875 "TIMESTAMPDIFF", e.unit, e.expression, e.this 876 ), 877 exp.TimestampTrunc: timestamptrunc_sql(), 878 exp.TimeStrToTime: timestrtotime_sql, 879 exp.TimeToStr: lambda self, e: self.func( 880 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 881 ), 882 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 883 exp.ToArray: rename_func("TO_ARRAY"), 884 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 885 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 886 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 887 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 888 exp.TsOrDsToDate: lambda self, e: self.func( 889 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 890 ), 891 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 892 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 893 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 894 exp.Xor: rename_func("BOOLXOR"), 895 } 896 897 SUPPORTED_JSON_PATH_PARTS = { 898 exp.JSONPathKey, 899 exp.JSONPathRoot, 900 exp.JSONPathSubscript, 901 } 902 903 TYPE_MAPPING = { 904 **generator.Generator.TYPE_MAPPING, 905 exp.DataType.Type.NESTED: "OBJECT", 906 exp.DataType.Type.STRUCT: "OBJECT", 907 } 908 909 PROPERTIES_LOCATION = { 910 **generator.Generator.PROPERTIES_LOCATION, 911 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 912 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 913 } 914 915 UNSUPPORTED_VALUES_EXPRESSIONS = { 916 exp.Map, 917 exp.StarMap, 918 exp.Struct, 919 exp.VarMap, 920 } 921 922 def with_properties(self, properties: exp.Properties) -> str: 923 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 924 925 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 926 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 927 values_as_table = False 928 929 return super().values_sql(expression, values_as_table=values_as_table) 930 931 def datatype_sql(self, expression: exp.DataType) -> str: 932 expressions = expression.expressions 933 if ( 934 expressions 935 and expression.is_type(*exp.DataType.STRUCT_TYPES) 936 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 937 ): 938 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 939 return "OBJECT" 940 941 return super().datatype_sql(expression) 942 943 def tonumber_sql(self, expression: exp.ToNumber) -> str: 944 return self.func( 945 "TO_NUMBER", 946 expression.this, 947 expression.args.get("format"), 948 expression.args.get("precision"), 949 expression.args.get("scale"), 950 ) 951 952 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 953 milli = expression.args.get("milli") 954 if milli is not None: 955 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 956 expression.set("nano", milli_to_nano) 957 958 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 959 960 def trycast_sql(self, expression: exp.TryCast) -> str: 961 value = expression.this 962 963 if value.type is None: 964 from sqlglot.optimizer.annotate_types import annotate_types 965 966 value = annotate_types(value) 967 968 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 969 return super().trycast_sql(expression) 970 971 # TRY_CAST only works for string values in Snowflake 972 return self.cast_sql(expression) 973 974 def log_sql(self, expression: exp.Log) -> str: 975 if not expression.expression: 976 return self.func("LN", expression.this) 977 978 return super().log_sql(expression) 979 980 def unnest_sql(self, expression: exp.Unnest) -> str: 981 unnest_alias = expression.args.get("alias") 982 offset = expression.args.get("offset") 983 984 columns = [ 985 exp.to_identifier("seq"), 986 exp.to_identifier("key"), 987 exp.to_identifier("path"), 988 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 989 seq_get(unnest_alias.columns if unnest_alias else [], 0) 990 or exp.to_identifier("value"), 991 exp.to_identifier("this"), 992 ] 993 994 if unnest_alias: 995 unnest_alias.set("columns", columns) 996 else: 997 unnest_alias = exp.TableAlias(this="_u", columns=columns) 998 999 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1000 alias = self.sql(unnest_alias) 1001 alias = f" AS {alias}" if alias else "" 1002 return f"{explode}{alias}" 1003 1004 def show_sql(self, expression: exp.Show) -> str: 1005 terse = "TERSE " if expression.args.get("terse") else "" 1006 history = " HISTORY" if expression.args.get("history") else "" 1007 like = self.sql(expression, "like") 1008 like = f" LIKE {like}" if like else "" 1009 1010 scope = self.sql(expression, "scope") 1011 scope = f" {scope}" if scope else "" 1012 1013 scope_kind = self.sql(expression, "scope_kind") 1014 if scope_kind: 1015 scope_kind = f" IN {scope_kind}" 1016 1017 starts_with = self.sql(expression, "starts_with") 1018 if starts_with: 1019 starts_with = f" STARTS WITH {starts_with}" 1020 1021 limit = self.sql(expression, "limit") 1022 1023 from_ = self.sql(expression, "from") 1024 if from_: 1025 from_ = f" FROM {from_}" 1026 1027 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1028 1029 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1030 # Other dialects don't support all of the following parameters, so we need to 1031 # generate default values as necessary to ensure the transpilation is correct 1032 group = expression.args.get("group") 1033 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1034 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1035 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1036 1037 return self.func( 1038 "REGEXP_SUBSTR", 1039 expression.this, 1040 expression.expression, 1041 position, 1042 occurrence, 1043 parameters, 1044 group, 1045 ) 1046 1047 def except_op(self, expression: exp.Except) -> str: 1048 if not expression.args.get("distinct"): 1049 self.unsupported("EXCEPT with All is not supported in Snowflake") 1050 return super().except_op(expression) 1051 1052 def intersect_op(self, expression: exp.Intersect) -> str: 1053 if not expression.args.get("distinct"): 1054 self.unsupported("INTERSECT with All is not supported in Snowflake") 1055 return super().intersect_op(expression) 1056 1057 def describe_sql(self, expression: exp.Describe) -> str: 1058 # Default to table if kind is unknown 1059 kind_value = expression.args.get("kind") or "TABLE" 1060 kind = f" {kind_value}" if kind_value else "" 1061 this = f" {self.sql(expression, 'this')}" 1062 expressions = self.expressions(expression, flat=True) 1063 expressions = f" {expressions}" if expressions else "" 1064 return f"DESCRIBE{kind}{this}{expressions}" 1065 1066 def generatedasidentitycolumnconstraint_sql( 1067 self, expression: exp.GeneratedAsIdentityColumnConstraint 1068 ) -> str: 1069 start = expression.args.get("start") 1070 start = f" START {start}" if start else "" 1071 increment = expression.args.get("increment") 1072 increment = f" INCREMENT {increment}" if increment else "" 1073 return f"AUTOINCREMENT{start}{increment}" 1074 1075 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1076 this = self.sql(expression, "this") 1077 return f"SWAP WITH {this}" 1078 1079 def cluster_sql(self, expression: exp.Cluster) -> str: 1080 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1081 1082 def struct_sql(self, expression: exp.Struct) -> str: 1083 keys = [] 1084 values = [] 1085 1086 for i, e in enumerate(expression.expressions): 1087 if isinstance(e, exp.PropertyEQ): 1088 keys.append( 1089 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1090 ) 1091 values.append(e.expression) 1092 else: 1093 keys.append(exp.Literal.string(f"_{i}")) 1094 values.append(e) 1095 1096 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1097 1098 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1099 if expression.args.get("weight") or expression.args.get("accuracy"): 1100 self.unsupported( 1101 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1102 ) 1103 1104 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1105 1106 def alterset_sql(self, expression: exp.AlterSet) -> str: 1107 exprs = self.expressions(expression, flat=True) 1108 exprs = f" {exprs}" if exprs else "" 1109 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1110 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1111 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1112 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1113 tag = self.expressions(expression, key="tag", flat=True) 1114 tag = f" TAG {tag}" if tag else "" 1115 1116 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
325 def quote_identifier(self, expression: E, identify: bool = True) -> E: 326 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 327 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 328 if ( 329 isinstance(expression, exp.Identifier) 330 and isinstance(expression.parent, exp.Table) 331 and expression.name.lower() == "dual" 332 ): 333 return expression # type: ignore 334 335 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
337 class Parser(parser.Parser): 338 IDENTIFY_PIVOT_STRINGS = True 339 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 340 COLON_IS_JSON_EXTRACT = True 341 342 ID_VAR_TOKENS = { 343 *parser.Parser.ID_VAR_TOKENS, 344 TokenType.MATCH_CONDITION, 345 } 346 347 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 348 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 349 350 FUNCTIONS = { 351 **parser.Parser.FUNCTIONS, 352 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 353 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 354 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 355 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 356 this=seq_get(args, 1), expression=seq_get(args, 0) 357 ), 358 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 359 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 360 start=seq_get(args, 0), 361 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 362 step=seq_get(args, 2), 363 ), 364 "BITXOR": binary_from_function(exp.BitwiseXor), 365 "BIT_XOR": binary_from_function(exp.BitwiseXor), 366 "BOOLXOR": binary_from_function(exp.Xor), 367 "CONVERT_TIMEZONE": _build_convert_timezone, 368 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 369 "DATE_TRUNC": _date_trunc_to_time, 370 "DATEADD": _build_date_time_add(exp.DateAdd), 371 "DATEDIFF": _build_datediff, 372 "DIV0": _build_if_from_div0, 373 "FLATTEN": exp.Explode.from_arg_list, 374 "GET_PATH": lambda args, dialect: exp.JSONExtract( 375 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 376 ), 377 "IFF": exp.If.from_arg_list, 378 "LAST_DAY": lambda args: exp.LastDay( 379 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 380 ), 381 "LISTAGG": exp.GroupConcat.from_arg_list, 382 "MEDIAN": lambda args: exp.PercentileCont( 383 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 384 ), 385 "NULLIFZERO": _build_if_from_nullifzero, 386 "OBJECT_CONSTRUCT": _build_object_construct, 387 "REGEXP_REPLACE": _build_regexp_replace, 388 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 389 "RLIKE": exp.RegexpLike.from_arg_list, 390 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 391 "TIMEADD": _build_date_time_add(exp.TimeAdd), 392 "TIMEDIFF": _build_datediff, 393 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 394 "TIMESTAMPDIFF": _build_datediff, 395 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 396 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 397 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 398 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 399 "TO_NUMBER": lambda args: exp.ToNumber( 400 this=seq_get(args, 0), 401 format=seq_get(args, 1), 402 precision=seq_get(args, 2), 403 scale=seq_get(args, 3), 404 ), 405 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 406 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 407 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 408 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 409 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 410 "TO_VARCHAR": exp.ToChar.from_arg_list, 411 "ZEROIFNULL": _build_if_from_zeroifnull, 412 } 413 414 FUNCTION_PARSERS = { 415 **parser.Parser.FUNCTION_PARSERS, 416 "DATE_PART": lambda self: self._parse_date_part(), 417 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 418 } 419 FUNCTION_PARSERS.pop("TRIM") 420 421 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 422 423 RANGE_PARSERS = { 424 **parser.Parser.RANGE_PARSERS, 425 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 426 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 427 } 428 429 ALTER_PARSERS = { 430 **parser.Parser.ALTER_PARSERS, 431 "UNSET": lambda self: self.expression( 432 exp.Set, 433 tag=self._match_text_seq("TAG"), 434 expressions=self._parse_csv(self._parse_id_var), 435 unset=True, 436 ), 437 "SWAP": lambda self: self._parse_alter_table_swap(), 438 } 439 440 STATEMENT_PARSERS = { 441 **parser.Parser.STATEMENT_PARSERS, 442 TokenType.SHOW: lambda self: self._parse_show(), 443 } 444 445 PROPERTY_PARSERS = { 446 **parser.Parser.PROPERTY_PARSERS, 447 "LOCATION": lambda self: self._parse_location_property(), 448 } 449 450 TYPE_CONVERTER = { 451 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 452 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 453 } 454 455 SHOW_PARSERS = { 456 "SCHEMAS": _show_parser("SCHEMAS"), 457 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 458 "OBJECTS": _show_parser("OBJECTS"), 459 "TERSE OBJECTS": _show_parser("OBJECTS"), 460 "TABLES": _show_parser("TABLES"), 461 "TERSE TABLES": _show_parser("TABLES"), 462 "VIEWS": _show_parser("VIEWS"), 463 "TERSE VIEWS": _show_parser("VIEWS"), 464 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 466 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 468 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 470 "SEQUENCES": _show_parser("SEQUENCES"), 471 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 472 "COLUMNS": _show_parser("COLUMNS"), 473 "USERS": _show_parser("USERS"), 474 "TERSE USERS": _show_parser("USERS"), 475 } 476 477 CONSTRAINT_PARSERS = { 478 **parser.Parser.CONSTRAINT_PARSERS, 479 "WITH": lambda self: self._parse_with_constraint(), 480 "MASKING": lambda self: self._parse_with_constraint(), 481 "PROJECTION": lambda self: self._parse_with_constraint(), 482 "TAG": lambda self: self._parse_with_constraint(), 483 } 484 485 STAGED_FILE_SINGLE_TOKENS = { 486 TokenType.DOT, 487 TokenType.MOD, 488 TokenType.SLASH, 489 } 490 491 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 492 493 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 494 495 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 496 497 LAMBDAS = { 498 **parser.Parser.LAMBDAS, 499 TokenType.ARROW: lambda self, expressions: self.expression( 500 exp.Lambda, 501 this=self._replace_lambda( 502 self._parse_conjunction(), 503 expressions, 504 ), 505 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 506 ), 507 } 508 509 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 510 if self._prev.token_type != TokenType.WITH: 511 self._retreat(self._index - 1) 512 513 if self._match_text_seq("MASKING", "POLICY"): 514 return self.expression( 515 exp.MaskingPolicyColumnConstraint, 516 this=self._parse_id_var(), 517 expressions=self._match(TokenType.USING) 518 and self._parse_wrapped_csv(self._parse_id_var), 519 ) 520 if self._match_text_seq("PROJECTION", "POLICY"): 521 return self.expression( 522 exp.ProjectionPolicyColumnConstraint, this=self._parse_id_var() 523 ) 524 if self._match(TokenType.TAG): 525 return self.expression( 526 exp.TagColumnConstraint, 527 expressions=self._parse_wrapped_csv(self._parse_property), 528 ) 529 530 return None 531 532 def _parse_create(self) -> exp.Create | exp.Command: 533 expression = super()._parse_create() 534 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 535 # Replace the Table node with the enclosed Identifier 536 expression.this.replace(expression.this.this) 537 538 return expression 539 540 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 541 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 542 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 543 this = self._parse_var() or self._parse_type() 544 545 if not this: 546 return None 547 548 self._match(TokenType.COMMA) 549 expression = self._parse_bitwise() 550 this = _map_date_part(this) 551 name = this.name.upper() 552 553 if name.startswith("EPOCH"): 554 if name == "EPOCH_MILLISECOND": 555 scale = 10**3 556 elif name == "EPOCH_MICROSECOND": 557 scale = 10**6 558 elif name == "EPOCH_NANOSECOND": 559 scale = 10**9 560 else: 561 scale = None 562 563 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 564 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 565 566 if scale: 567 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 568 569 return to_unix 570 571 return self.expression(exp.Extract, this=this, expression=expression) 572 573 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 574 if is_map: 575 # Keys are strings in Snowflake's objects, see also: 576 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 577 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 578 return self._parse_slice(self._parse_string()) 579 580 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 581 582 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 583 lateral = super()._parse_lateral() 584 if not lateral: 585 return lateral 586 587 if isinstance(lateral.this, exp.Explode): 588 table_alias = lateral.args.get("alias") 589 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 590 if table_alias and not table_alias.args.get("columns"): 591 table_alias.set("columns", columns) 592 elif not table_alias: 593 exp.alias_(lateral, "_flattened", table=columns, copy=False) 594 595 return lateral 596 597 def _parse_at_before(self, table: exp.Table) -> exp.Table: 598 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 599 index = self._index 600 if self._match_texts(("AT", "BEFORE")): 601 this = self._prev.text.upper() 602 kind = ( 603 self._match(TokenType.L_PAREN) 604 and self._match_texts(self.HISTORICAL_DATA_KIND) 605 and self._prev.text.upper() 606 ) 607 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 608 609 if expression: 610 self._match_r_paren() 611 when = self.expression( 612 exp.HistoricalData, this=this, kind=kind, expression=expression 613 ) 614 table.set("when", when) 615 else: 616 self._retreat(index) 617 618 return table 619 620 def _parse_table_parts( 621 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 622 ) -> exp.Table: 623 # https://docs.snowflake.com/en/user-guide/querying-stage 624 if self._match(TokenType.STRING, advance=False): 625 table = self._parse_string() 626 elif self._match_text_seq("@", advance=False): 627 table = self._parse_location_path() 628 else: 629 table = None 630 631 if table: 632 file_format = None 633 pattern = None 634 635 wrapped = self._match(TokenType.L_PAREN) 636 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 637 if self._match_text_seq("FILE_FORMAT", "=>"): 638 file_format = self._parse_string() or super()._parse_table_parts( 639 is_db_reference=is_db_reference 640 ) 641 elif self._match_text_seq("PATTERN", "=>"): 642 pattern = self._parse_string() 643 else: 644 break 645 646 self._match(TokenType.COMMA) 647 648 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 649 else: 650 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 651 652 return self._parse_at_before(table) 653 654 def _parse_id_var( 655 self, 656 any_token: bool = True, 657 tokens: t.Optional[t.Collection[TokenType]] = None, 658 ) -> t.Optional[exp.Expression]: 659 if self._match_text_seq("IDENTIFIER", "("): 660 identifier = ( 661 super()._parse_id_var(any_token=any_token, tokens=tokens) 662 or self._parse_string() 663 ) 664 self._match_r_paren() 665 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 666 667 return super()._parse_id_var(any_token=any_token, tokens=tokens) 668 669 def _parse_show_snowflake(self, this: str) -> exp.Show: 670 scope = None 671 scope_kind = None 672 673 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 674 # which is syntactically valid but has no effect on the output 675 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 676 677 history = self._match_text_seq("HISTORY") 678 679 like = self._parse_string() if self._match(TokenType.LIKE) else None 680 681 if self._match(TokenType.IN): 682 if self._match_text_seq("ACCOUNT"): 683 scope_kind = "ACCOUNT" 684 elif self._match_set(self.DB_CREATABLES): 685 scope_kind = self._prev.text.upper() 686 if self._curr: 687 scope = self._parse_table_parts() 688 elif self._curr: 689 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 690 scope = self._parse_table_parts() 691 692 return self.expression( 693 exp.Show, 694 **{ 695 "terse": terse, 696 "this": this, 697 "history": history, 698 "like": like, 699 "scope": scope, 700 "scope_kind": scope_kind, 701 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 702 "limit": self._parse_limit(), 703 "from": self._parse_string() if self._match(TokenType.FROM) else None, 704 }, 705 ) 706 707 def _parse_alter_table_swap(self) -> exp.SwapTable: 708 self._match_text_seq("WITH") 709 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 710 711 def _parse_location_property(self) -> exp.LocationProperty: 712 self._match(TokenType.EQ) 713 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 714 715 def _parse_file_location(self) -> t.Optional[exp.Expression]: 716 # Parse either a subquery or a staged file 717 return ( 718 self._parse_select(table=True) 719 if self._match(TokenType.L_PAREN, advance=False) 720 else self._parse_table_parts() 721 ) 722 723 def _parse_location_path(self) -> exp.Var: 724 parts = [self._advance_any(ignore_reserved=True)] 725 726 # We avoid consuming a comma token because external tables like @foo and @bar 727 # can be joined in a query with a comma separator, as well as closing paren 728 # in case of subqueries 729 while self._is_connected() and not self._match_set( 730 (TokenType.COMMA, TokenType.R_PAREN), advance=False 731 ): 732 parts.append(self._advance_any(ignore_reserved=True)) 733 734 return exp.var("".join(part.text for part in parts if part)) 735 736 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 737 this = super()._parse_lambda_arg() 738 739 if not this: 740 return this 741 742 typ = self._parse_types() 743 744 if typ: 745 return self.expression(exp.Cast, this=this, to=typ) 746 747 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
749 class Tokenizer(tokens.Tokenizer): 750 STRING_ESCAPES = ["\\", "'"] 751 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 752 RAW_STRINGS = ["$$"] 753 COMMENTS = ["--", "//", ("/*", "*/")] 754 755 KEYWORDS = { 756 **tokens.Tokenizer.KEYWORDS, 757 "BYTEINT": TokenType.INT, 758 "CHAR VARYING": TokenType.VARCHAR, 759 "CHARACTER VARYING": TokenType.VARCHAR, 760 "EXCLUDE": TokenType.EXCEPT, 761 "ILIKE ANY": TokenType.ILIKE_ANY, 762 "LIKE ANY": TokenType.LIKE_ANY, 763 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 764 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 765 "MINUS": TokenType.EXCEPT, 766 "NCHAR VARYING": TokenType.VARCHAR, 767 "PUT": TokenType.COMMAND, 768 "REMOVE": TokenType.COMMAND, 769 "RM": TokenType.COMMAND, 770 "SAMPLE": TokenType.TABLE_SAMPLE, 771 "SQL_DOUBLE": TokenType.DOUBLE, 772 "SQL_VARCHAR": TokenType.VARCHAR, 773 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 774 "TAG": TokenType.TAG, 775 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 776 "TOP": TokenType.TOP, 777 "WAREHOUSE": TokenType.WAREHOUSE, 778 "STREAMLIT": TokenType.STREAMLIT, 779 } 780 781 SINGLE_TOKENS = { 782 **tokens.Tokenizer.SINGLE_TOKENS, 783 "$": TokenType.PARAMETER, 784 } 785 786 VAR_SINGLE_TOKENS = {"$"} 787 788 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
790 class Generator(generator.Generator): 791 PARAMETER_TOKEN = "$" 792 MATCHED_BY_SOURCE = False 793 SINGLE_STRING_INTERVAL = True 794 JOIN_HINTS = False 795 TABLE_HINTS = False 796 QUERY_HINTS = False 797 AGGREGATE_FILTER_SUPPORTED = False 798 SUPPORTS_TABLE_COPY = False 799 COLLATE_IS_FUNC = True 800 LIMIT_ONLY_LITERALS = True 801 JSON_KEY_VALUE_PAIR_SEP = "," 802 INSERT_OVERWRITE = " OVERWRITE INTO" 803 STRUCT_DELIMITER = ("(", ")") 804 COPY_PARAMS_ARE_WRAPPED = False 805 COPY_PARAMS_EQ_REQUIRED = True 806 STAR_EXCEPT = "EXCLUDE" 807 808 TRANSFORMS = { 809 **generator.Generator.TRANSFORMS, 810 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 811 exp.ArgMax: rename_func("MAX_BY"), 812 exp.ArgMin: rename_func("MIN_BY"), 813 exp.Array: inline_array_sql, 814 exp.ArrayConcat: rename_func("ARRAY_CAT"), 815 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 816 exp.AtTimeZone: lambda self, e: self.func( 817 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 818 ), 819 exp.BitwiseXor: rename_func("BITXOR"), 820 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 821 exp.DateAdd: date_delta_sql("DATEADD"), 822 exp.DateDiff: date_delta_sql("DATEDIFF"), 823 exp.DateStrToDate: datestrtodate_sql, 824 exp.DayOfMonth: rename_func("DAYOFMONTH"), 825 exp.DayOfWeek: rename_func("DAYOFWEEK"), 826 exp.DayOfYear: rename_func("DAYOFYEAR"), 827 exp.Explode: rename_func("FLATTEN"), 828 exp.Extract: rename_func("DATE_PART"), 829 exp.FromTimeZone: lambda self, e: self.func( 830 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 831 ), 832 exp.GenerateSeries: lambda self, e: self.func( 833 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 834 ), 835 exp.GroupConcat: rename_func("LISTAGG"), 836 exp.If: if_sql(name="IFF", false_value="NULL"), 837 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 838 exp.JSONExtractScalar: lambda self, e: self.func( 839 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 840 ), 841 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 842 exp.JSONPathRoot: lambda *_: "", 843 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 844 exp.LogicalOr: rename_func("BOOLOR_AGG"), 845 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 846 exp.Max: max_or_greatest, 847 exp.Min: min_or_least, 848 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 849 exp.PercentileCont: transforms.preprocess( 850 [transforms.add_within_group_for_percentiles] 851 ), 852 exp.PercentileDisc: transforms.preprocess( 853 [transforms.add_within_group_for_percentiles] 854 ), 855 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 856 exp.RegexpILike: _regexpilike_sql, 857 exp.Rand: rename_func("RANDOM"), 858 exp.Select: transforms.preprocess( 859 [ 860 transforms.eliminate_distinct_on, 861 transforms.explode_to_unnest(), 862 transforms.eliminate_semi_and_anti_joins, 863 ] 864 ), 865 exp.SHA: rename_func("SHA1"), 866 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 867 exp.StartsWith: rename_func("STARTSWITH"), 868 exp.StrPosition: lambda self, e: self.func( 869 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 870 ), 871 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 872 exp.Stuff: rename_func("INSERT"), 873 exp.TimeAdd: date_delta_sql("TIMEADD"), 874 exp.TimestampDiff: lambda self, e: self.func( 875 "TIMESTAMPDIFF", e.unit, e.expression, e.this 876 ), 877 exp.TimestampTrunc: timestamptrunc_sql(), 878 exp.TimeStrToTime: timestrtotime_sql, 879 exp.TimeToStr: lambda self, e: self.func( 880 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 881 ), 882 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 883 exp.ToArray: rename_func("TO_ARRAY"), 884 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 885 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 886 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 887 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 888 exp.TsOrDsToDate: lambda self, e: self.func( 889 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 890 ), 891 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 892 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 893 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 894 exp.Xor: rename_func("BOOLXOR"), 895 } 896 897 SUPPORTED_JSON_PATH_PARTS = { 898 exp.JSONPathKey, 899 exp.JSONPathRoot, 900 exp.JSONPathSubscript, 901 } 902 903 TYPE_MAPPING = { 904 **generator.Generator.TYPE_MAPPING, 905 exp.DataType.Type.NESTED: "OBJECT", 906 exp.DataType.Type.STRUCT: "OBJECT", 907 } 908 909 PROPERTIES_LOCATION = { 910 **generator.Generator.PROPERTIES_LOCATION, 911 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 912 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 913 } 914 915 UNSUPPORTED_VALUES_EXPRESSIONS = { 916 exp.Map, 917 exp.StarMap, 918 exp.Struct, 919 exp.VarMap, 920 } 921 922 def with_properties(self, properties: exp.Properties) -> str: 923 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 924 925 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 926 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 927 values_as_table = False 928 929 return super().values_sql(expression, values_as_table=values_as_table) 930 931 def datatype_sql(self, expression: exp.DataType) -> str: 932 expressions = expression.expressions 933 if ( 934 expressions 935 and expression.is_type(*exp.DataType.STRUCT_TYPES) 936 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 937 ): 938 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 939 return "OBJECT" 940 941 return super().datatype_sql(expression) 942 943 def tonumber_sql(self, expression: exp.ToNumber) -> str: 944 return self.func( 945 "TO_NUMBER", 946 expression.this, 947 expression.args.get("format"), 948 expression.args.get("precision"), 949 expression.args.get("scale"), 950 ) 951 952 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 953 milli = expression.args.get("milli") 954 if milli is not None: 955 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 956 expression.set("nano", milli_to_nano) 957 958 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 959 960 def trycast_sql(self, expression: exp.TryCast) -> str: 961 value = expression.this 962 963 if value.type is None: 964 from sqlglot.optimizer.annotate_types import annotate_types 965 966 value = annotate_types(value) 967 968 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 969 return super().trycast_sql(expression) 970 971 # TRY_CAST only works for string values in Snowflake 972 return self.cast_sql(expression) 973 974 def log_sql(self, expression: exp.Log) -> str: 975 if not expression.expression: 976 return self.func("LN", expression.this) 977 978 return super().log_sql(expression) 979 980 def unnest_sql(self, expression: exp.Unnest) -> str: 981 unnest_alias = expression.args.get("alias") 982 offset = expression.args.get("offset") 983 984 columns = [ 985 exp.to_identifier("seq"), 986 exp.to_identifier("key"), 987 exp.to_identifier("path"), 988 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 989 seq_get(unnest_alias.columns if unnest_alias else [], 0) 990 or exp.to_identifier("value"), 991 exp.to_identifier("this"), 992 ] 993 994 if unnest_alias: 995 unnest_alias.set("columns", columns) 996 else: 997 unnest_alias = exp.TableAlias(this="_u", columns=columns) 998 999 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1000 alias = self.sql(unnest_alias) 1001 alias = f" AS {alias}" if alias else "" 1002 return f"{explode}{alias}" 1003 1004 def show_sql(self, expression: exp.Show) -> str: 1005 terse = "TERSE " if expression.args.get("terse") else "" 1006 history = " HISTORY" if expression.args.get("history") else "" 1007 like = self.sql(expression, "like") 1008 like = f" LIKE {like}" if like else "" 1009 1010 scope = self.sql(expression, "scope") 1011 scope = f" {scope}" if scope else "" 1012 1013 scope_kind = self.sql(expression, "scope_kind") 1014 if scope_kind: 1015 scope_kind = f" IN {scope_kind}" 1016 1017 starts_with = self.sql(expression, "starts_with") 1018 if starts_with: 1019 starts_with = f" STARTS WITH {starts_with}" 1020 1021 limit = self.sql(expression, "limit") 1022 1023 from_ = self.sql(expression, "from") 1024 if from_: 1025 from_ = f" FROM {from_}" 1026 1027 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1028 1029 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1030 # Other dialects don't support all of the following parameters, so we need to 1031 # generate default values as necessary to ensure the transpilation is correct 1032 group = expression.args.get("group") 1033 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1034 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1035 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1036 1037 return self.func( 1038 "REGEXP_SUBSTR", 1039 expression.this, 1040 expression.expression, 1041 position, 1042 occurrence, 1043 parameters, 1044 group, 1045 ) 1046 1047 def except_op(self, expression: exp.Except) -> str: 1048 if not expression.args.get("distinct"): 1049 self.unsupported("EXCEPT with All is not supported in Snowflake") 1050 return super().except_op(expression) 1051 1052 def intersect_op(self, expression: exp.Intersect) -> str: 1053 if not expression.args.get("distinct"): 1054 self.unsupported("INTERSECT with All is not supported in Snowflake") 1055 return super().intersect_op(expression) 1056 1057 def describe_sql(self, expression: exp.Describe) -> str: 1058 # Default to table if kind is unknown 1059 kind_value = expression.args.get("kind") or "TABLE" 1060 kind = f" {kind_value}" if kind_value else "" 1061 this = f" {self.sql(expression, 'this')}" 1062 expressions = self.expressions(expression, flat=True) 1063 expressions = f" {expressions}" if expressions else "" 1064 return f"DESCRIBE{kind}{this}{expressions}" 1065 1066 def generatedasidentitycolumnconstraint_sql( 1067 self, expression: exp.GeneratedAsIdentityColumnConstraint 1068 ) -> str: 1069 start = expression.args.get("start") 1070 start = f" START {start}" if start else "" 1071 increment = expression.args.get("increment") 1072 increment = f" INCREMENT {increment}" if increment else "" 1073 return f"AUTOINCREMENT{start}{increment}" 1074 1075 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1076 this = self.sql(expression, "this") 1077 return f"SWAP WITH {this}" 1078 1079 def cluster_sql(self, expression: exp.Cluster) -> str: 1080 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1081 1082 def struct_sql(self, expression: exp.Struct) -> str: 1083 keys = [] 1084 values = [] 1085 1086 for i, e in enumerate(expression.expressions): 1087 if isinstance(e, exp.PropertyEQ): 1088 keys.append( 1089 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1090 ) 1091 values.append(e.expression) 1092 else: 1093 keys.append(exp.Literal.string(f"_{i}")) 1094 values.append(e) 1095 1096 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1097 1098 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1099 if expression.args.get("weight") or expression.args.get("accuracy"): 1100 self.unsupported( 1101 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1102 ) 1103 1104 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1105 1106 def alterset_sql(self, expression: exp.AlterSet) -> str: 1107 exprs = self.expressions(expression, flat=True) 1108 exprs = f" {exprs}" if exprs else "" 1109 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1110 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1111 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1112 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1113 tag = self.expressions(expression, key="tag", flat=True) 1114 tag = f" TAG {tag}" if tag else "" 1115 1116 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
931 def datatype_sql(self, expression: exp.DataType) -> str: 932 expressions = expression.expressions 933 if ( 934 expressions 935 and expression.is_type(*exp.DataType.STRUCT_TYPES) 936 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 937 ): 938 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 939 return "OBJECT" 940 941 return super().datatype_sql(expression)
952 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 953 milli = expression.args.get("milli") 954 if milli is not None: 955 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 956 expression.set("nano", milli_to_nano) 957 958 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
960 def trycast_sql(self, expression: exp.TryCast) -> str: 961 value = expression.this 962 963 if value.type is None: 964 from sqlglot.optimizer.annotate_types import annotate_types 965 966 value = annotate_types(value) 967 968 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 969 return super().trycast_sql(expression) 970 971 # TRY_CAST only works for string values in Snowflake 972 return self.cast_sql(expression)
980 def unnest_sql(self, expression: exp.Unnest) -> str: 981 unnest_alias = expression.args.get("alias") 982 offset = expression.args.get("offset") 983 984 columns = [ 985 exp.to_identifier("seq"), 986 exp.to_identifier("key"), 987 exp.to_identifier("path"), 988 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 989 seq_get(unnest_alias.columns if unnest_alias else [], 0) 990 or exp.to_identifier("value"), 991 exp.to_identifier("this"), 992 ] 993 994 if unnest_alias: 995 unnest_alias.set("columns", columns) 996 else: 997 unnest_alias = exp.TableAlias(this="_u", columns=columns) 998 999 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1000 alias = self.sql(unnest_alias) 1001 alias = f" AS {alias}" if alias else "" 1002 return f"{explode}{alias}"
1004 def show_sql(self, expression: exp.Show) -> str: 1005 terse = "TERSE " if expression.args.get("terse") else "" 1006 history = " HISTORY" if expression.args.get("history") else "" 1007 like = self.sql(expression, "like") 1008 like = f" LIKE {like}" if like else "" 1009 1010 scope = self.sql(expression, "scope") 1011 scope = f" {scope}" if scope else "" 1012 1013 scope_kind = self.sql(expression, "scope_kind") 1014 if scope_kind: 1015 scope_kind = f" IN {scope_kind}" 1016 1017 starts_with = self.sql(expression, "starts_with") 1018 if starts_with: 1019 starts_with = f" STARTS WITH {starts_with}" 1020 1021 limit = self.sql(expression, "limit") 1022 1023 from_ = self.sql(expression, "from") 1024 if from_: 1025 from_ = f" FROM {from_}" 1026 1027 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
1029 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1030 # Other dialects don't support all of the following parameters, so we need to 1031 # generate default values as necessary to ensure the transpilation is correct 1032 group = expression.args.get("group") 1033 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1034 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1035 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1036 1037 return self.func( 1038 "REGEXP_SUBSTR", 1039 expression.this, 1040 expression.expression, 1041 position, 1042 occurrence, 1043 parameters, 1044 group, 1045 )
1057 def describe_sql(self, expression: exp.Describe) -> str: 1058 # Default to table if kind is unknown 1059 kind_value = expression.args.get("kind") or "TABLE" 1060 kind = f" {kind_value}" if kind_value else "" 1061 this = f" {self.sql(expression, 'this')}" 1062 expressions = self.expressions(expression, flat=True) 1063 expressions = f" {expressions}" if expressions else "" 1064 return f"DESCRIBE{kind}{this}{expressions}"
1066 def generatedasidentitycolumnconstraint_sql( 1067 self, expression: exp.GeneratedAsIdentityColumnConstraint 1068 ) -> str: 1069 start = expression.args.get("start") 1070 start = f" START {start}" if start else "" 1071 increment = expression.args.get("increment") 1072 increment = f" INCREMENT {increment}" if increment else "" 1073 return f"AUTOINCREMENT{start}{increment}"
1082 def struct_sql(self, expression: exp.Struct) -> str: 1083 keys = [] 1084 values = [] 1085 1086 for i, e in enumerate(expression.expressions): 1087 if isinstance(e, exp.PropertyEQ): 1088 keys.append( 1089 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1090 ) 1091 values.append(e.expression) 1092 else: 1093 keys.append(exp.Literal.string(f"_{i}")) 1094 values.append(e) 1095 1096 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1098 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1099 if expression.args.get("weight") or expression.args.get("accuracy"): 1100 self.unsupported( 1101 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1102 ) 1103 1104 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1106 def alterset_sql(self, expression: exp.AlterSet) -> str: 1107 exprs = self.expressions(expression, flat=True) 1108 exprs = f" {exprs}" if exprs else "" 1109 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1110 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1111 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1112 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1113 tag = self.expressions(expression, key="tag", flat=True) 1114 tag = f" TAG {tag}" if tag else "" 1115 1116 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- OUTER_UNION_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql