sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25) 26from sqlglot.helper import flatten, is_float, is_int, seq_get 27from sqlglot.tokens import TokenType 28 29if t.TYPE_CHECKING: 30 from sqlglot._typing import E 31 32 33# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 34def _build_datetime( 35 name: str, kind: exp.DataType.Type, safe: bool = False 36) -> t.Callable[[t.List], exp.Func]: 37 def _builder(args: t.List) -> exp.Func: 38 value = seq_get(args, 0) 39 int_value = value is not None and is_int(value.name) 40 41 if isinstance(value, exp.Literal): 42 # Converts calls like `TO_TIME('01:02:03')` into casts 43 if len(args) == 1 and value.is_string and not int_value: 44 return exp.cast(value, kind) 45 46 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 47 # cases so we can transpile them, since they're relatively common 48 if kind == exp.DataType.Type.TIMESTAMP: 49 if int_value: 50 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 51 if not is_float(value.this): 52 return build_formatted_time(exp.StrToTime, "snowflake")(args) 53 54 if kind == exp.DataType.Type.DATE and not int_value: 55 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 56 formatted_exp.set("safe", safe) 57 return formatted_exp 58 59 return exp.Anonymous(this=name, expressions=args) 60 61 return _builder 62 63 64def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 65 expression = parser.build_var_map(args) 66 67 if isinstance(expression, exp.StarMap): 68 return expression 69 70 return exp.Struct( 71 expressions=[ 72 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 73 ] 74 ) 75 76 77def _build_datediff(args: t.List) -> exp.DateDiff: 78 return exp.DateDiff( 79 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 80 ) 81 82 83def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 84 def _builder(args: t.List) -> E: 85 return expr_type( 86 this=seq_get(args, 2), 87 expression=seq_get(args, 1), 88 unit=map_date_part(seq_get(args, 0)), 89 ) 90 91 return _builder 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/div0 95def _build_if_from_div0(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 97 true = exp.Literal.number(0) 98 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 99 return exp.If(this=cond, true=true, false=false) 100 101 102# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 103def _build_if_from_zeroifnull(args: t.List) -> exp.If: 104 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 105 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 106 107 108# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 109def _build_if_from_nullifzero(args: t.List) -> exp.If: 110 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 111 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 112 113 114def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 115 flag = expression.text("flag") 116 117 if "i" not in flag: 118 flag += "i" 119 120 return self.func( 121 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 122 ) 123 124 125def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 126 if len(args) == 3: 127 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 128 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 129 130 131def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 132 regexp_replace = exp.RegexpReplace.from_arg_list(args) 133 134 if not regexp_replace.args.get("replacement"): 135 regexp_replace.set("replacement", exp.Literal.string("")) 136 137 return regexp_replace 138 139 140def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 141 def _parse(self: Snowflake.Parser) -> exp.Show: 142 return self._parse_show_snowflake(*args, **kwargs) 143 144 return _parse 145 146 147def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 148 trunc = date_trunc_to_time(args) 149 trunc.set("unit", map_date_part(trunc.args["unit"])) 150 return trunc 151 152 153def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 154 """ 155 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 156 so we need to unqualify them. 157 158 Example: 159 >>> from sqlglot import parse_one 160 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 161 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 162 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 163 """ 164 if isinstance(expression, exp.Pivot) and expression.unpivot: 165 expression = transforms.unqualify_columns(expression) 166 167 return expression 168 169 170def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 171 assert isinstance(expression, exp.Create) 172 173 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 174 if expression.this in exp.DataType.NESTED_TYPES: 175 expression.set("expressions", None) 176 return expression 177 178 props = expression.args.get("properties") 179 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 180 for schema_expression in expression.this.expressions: 181 if isinstance(schema_expression, exp.ColumnDef): 182 column_type = schema_expression.kind 183 if isinstance(column_type, exp.DataType): 184 column_type.transform(_flatten_structured_type, copy=False) 185 186 return expression 187 188 189class Snowflake(Dialect): 190 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 191 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 192 NULL_ORDERING = "nulls_are_large" 193 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 194 SUPPORTS_USER_DEFINED_TYPES = False 195 SUPPORTS_SEMI_ANTI_JOIN = False 196 PREFER_CTE_ALIAS_COLUMN = True 197 TABLESAMPLE_SIZE_IS_PERCENT = True 198 COPY_PARAMS_ARE_CSV = False 199 200 TIME_MAPPING = { 201 "YYYY": "%Y", 202 "yyyy": "%Y", 203 "YY": "%y", 204 "yy": "%y", 205 "MMMM": "%B", 206 "mmmm": "%B", 207 "MON": "%b", 208 "mon": "%b", 209 "MM": "%m", 210 "mm": "%m", 211 "DD": "%d", 212 "dd": "%-d", 213 "DY": "%a", 214 "dy": "%w", 215 "HH24": "%H", 216 "hh24": "%H", 217 "HH12": "%I", 218 "hh12": "%I", 219 "MI": "%M", 220 "mi": "%M", 221 "SS": "%S", 222 "ss": "%S", 223 "FF": "%f", 224 "ff": "%f", 225 "FF6": "%f", 226 "ff6": "%f", 227 } 228 229 def quote_identifier(self, expression: E, identify: bool = True) -> E: 230 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 231 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 232 if ( 233 isinstance(expression, exp.Identifier) 234 and isinstance(expression.parent, exp.Table) 235 and expression.name.lower() == "dual" 236 ): 237 return expression # type: ignore 238 239 return super().quote_identifier(expression, identify=identify) 240 241 class Parser(parser.Parser): 242 IDENTIFY_PIVOT_STRINGS = True 243 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 244 COLON_IS_VARIANT_EXTRACT = True 245 246 ID_VAR_TOKENS = { 247 *parser.Parser.ID_VAR_TOKENS, 248 TokenType.MATCH_CONDITION, 249 } 250 251 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 252 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 253 254 FUNCTIONS = { 255 **parser.Parser.FUNCTIONS, 256 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 257 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 258 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 259 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 260 this=seq_get(args, 1), expression=seq_get(args, 0) 261 ), 262 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 263 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 264 start=seq_get(args, 0), 265 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 266 step=seq_get(args, 2), 267 ), 268 "BITXOR": binary_from_function(exp.BitwiseXor), 269 "BIT_XOR": binary_from_function(exp.BitwiseXor), 270 "BOOLXOR": binary_from_function(exp.Xor), 271 "CONVERT_TIMEZONE": _build_convert_timezone, 272 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 273 "DATE_TRUNC": _date_trunc_to_time, 274 "DATEADD": _build_date_time_add(exp.DateAdd), 275 "DATEDIFF": _build_datediff, 276 "DIV0": _build_if_from_div0, 277 "FLATTEN": exp.Explode.from_arg_list, 278 "GET_PATH": lambda args, dialect: exp.JSONExtract( 279 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 280 ), 281 "IFF": exp.If.from_arg_list, 282 "LAST_DAY": lambda args: exp.LastDay( 283 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 284 ), 285 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 286 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 287 "LISTAGG": exp.GroupConcat.from_arg_list, 288 "MEDIAN": lambda args: exp.PercentileCont( 289 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 290 ), 291 "NULLIFZERO": _build_if_from_nullifzero, 292 "OBJECT_CONSTRUCT": _build_object_construct, 293 "REGEXP_REPLACE": _build_regexp_replace, 294 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 295 "RLIKE": exp.RegexpLike.from_arg_list, 296 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 297 "TIMEADD": _build_date_time_add(exp.TimeAdd), 298 "TIMEDIFF": _build_datediff, 299 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 300 "TIMESTAMPDIFF": _build_datediff, 301 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 302 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 303 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 304 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 305 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 306 "TO_NUMBER": lambda args: exp.ToNumber( 307 this=seq_get(args, 0), 308 format=seq_get(args, 1), 309 precision=seq_get(args, 2), 310 scale=seq_get(args, 3), 311 ), 312 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 313 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 314 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 315 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 316 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 317 "TO_VARCHAR": exp.ToChar.from_arg_list, 318 "ZEROIFNULL": _build_if_from_zeroifnull, 319 } 320 321 FUNCTION_PARSERS = { 322 **parser.Parser.FUNCTION_PARSERS, 323 "DATE_PART": lambda self: self._parse_date_part(), 324 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 325 } 326 FUNCTION_PARSERS.pop("TRIM") 327 328 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 329 330 RANGE_PARSERS = { 331 **parser.Parser.RANGE_PARSERS, 332 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 333 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 334 } 335 336 ALTER_PARSERS = { 337 **parser.Parser.ALTER_PARSERS, 338 "UNSET": lambda self: self.expression( 339 exp.Set, 340 tag=self._match_text_seq("TAG"), 341 expressions=self._parse_csv(self._parse_id_var), 342 unset=True, 343 ), 344 "SWAP": lambda self: self._parse_alter_table_swap(), 345 } 346 347 STATEMENT_PARSERS = { 348 **parser.Parser.STATEMENT_PARSERS, 349 TokenType.SHOW: lambda self: self._parse_show(), 350 } 351 352 PROPERTY_PARSERS = { 353 **parser.Parser.PROPERTY_PARSERS, 354 "LOCATION": lambda self: self._parse_location_property(), 355 } 356 357 TYPE_CONVERTERS = { 358 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 359 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 360 } 361 362 SHOW_PARSERS = { 363 "SCHEMAS": _show_parser("SCHEMAS"), 364 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 365 "OBJECTS": _show_parser("OBJECTS"), 366 "TERSE OBJECTS": _show_parser("OBJECTS"), 367 "TABLES": _show_parser("TABLES"), 368 "TERSE TABLES": _show_parser("TABLES"), 369 "VIEWS": _show_parser("VIEWS"), 370 "TERSE VIEWS": _show_parser("VIEWS"), 371 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 372 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 373 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 374 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 375 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 376 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 377 "SEQUENCES": _show_parser("SEQUENCES"), 378 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 379 "COLUMNS": _show_parser("COLUMNS"), 380 "USERS": _show_parser("USERS"), 381 "TERSE USERS": _show_parser("USERS"), 382 } 383 384 CONSTRAINT_PARSERS = { 385 **parser.Parser.CONSTRAINT_PARSERS, 386 "WITH": lambda self: self._parse_with_constraint(), 387 "MASKING": lambda self: self._parse_with_constraint(), 388 "PROJECTION": lambda self: self._parse_with_constraint(), 389 "TAG": lambda self: self._parse_with_constraint(), 390 } 391 392 STAGED_FILE_SINGLE_TOKENS = { 393 TokenType.DOT, 394 TokenType.MOD, 395 TokenType.SLASH, 396 } 397 398 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 399 400 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 401 402 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 403 404 LAMBDAS = { 405 **parser.Parser.LAMBDAS, 406 TokenType.ARROW: lambda self, expressions: self.expression( 407 exp.Lambda, 408 this=self._replace_lambda( 409 self._parse_assignment(), 410 expressions, 411 ), 412 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 413 ), 414 } 415 416 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 417 if self._prev.token_type != TokenType.WITH: 418 self._retreat(self._index - 1) 419 420 if self._match_text_seq("MASKING", "POLICY"): 421 policy = self._parse_column() 422 return self.expression( 423 exp.MaskingPolicyColumnConstraint, 424 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 425 expressions=self._match(TokenType.USING) 426 and self._parse_wrapped_csv(self._parse_id_var), 427 ) 428 if self._match_text_seq("PROJECTION", "POLICY"): 429 policy = self._parse_column() 430 return self.expression( 431 exp.ProjectionPolicyColumnConstraint, 432 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 433 ) 434 if self._match(TokenType.TAG): 435 return self.expression( 436 exp.TagColumnConstraint, 437 expressions=self._parse_wrapped_csv(self._parse_property), 438 ) 439 440 return None 441 442 def _parse_create(self) -> exp.Create | exp.Command: 443 expression = super()._parse_create() 444 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 445 # Replace the Table node with the enclosed Identifier 446 expression.this.replace(expression.this.this) 447 448 return expression 449 450 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 451 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 452 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 453 this = self._parse_var() or self._parse_type() 454 455 if not this: 456 return None 457 458 self._match(TokenType.COMMA) 459 expression = self._parse_bitwise() 460 this = map_date_part(this) 461 name = this.name.upper() 462 463 if name.startswith("EPOCH"): 464 if name == "EPOCH_MILLISECOND": 465 scale = 10**3 466 elif name == "EPOCH_MICROSECOND": 467 scale = 10**6 468 elif name == "EPOCH_NANOSECOND": 469 scale = 10**9 470 else: 471 scale = None 472 473 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 474 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 475 476 if scale: 477 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 478 479 return to_unix 480 481 return self.expression(exp.Extract, this=this, expression=expression) 482 483 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 484 if is_map: 485 # Keys are strings in Snowflake's objects, see also: 486 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 487 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 488 return self._parse_slice(self._parse_string()) 489 490 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 491 492 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 493 lateral = super()._parse_lateral() 494 if not lateral: 495 return lateral 496 497 if isinstance(lateral.this, exp.Explode): 498 table_alias = lateral.args.get("alias") 499 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 500 if table_alias and not table_alias.args.get("columns"): 501 table_alias.set("columns", columns) 502 elif not table_alias: 503 exp.alias_(lateral, "_flattened", table=columns, copy=False) 504 505 return lateral 506 507 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 508 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 509 index = self._index 510 historical_data = None 511 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 512 this = self._prev.text.upper() 513 kind = ( 514 self._match(TokenType.L_PAREN) 515 and self._match_texts(self.HISTORICAL_DATA_KIND) 516 and self._prev.text.upper() 517 ) 518 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 519 520 if expression: 521 self._match_r_paren() 522 historical_data = self.expression( 523 exp.HistoricalData, this=this, kind=kind, expression=expression 524 ) 525 else: 526 self._retreat(index) 527 528 return historical_data 529 530 def _parse_changes(self) -> t.Optional[exp.Changes]: 531 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 532 return None 533 534 information = self._parse_var(any_token=True) 535 self._match_r_paren() 536 537 return self.expression( 538 exp.Changes, 539 information=information, 540 at_before=self._parse_historical_data(), 541 end=self._parse_historical_data(), 542 ) 543 544 def _parse_table_parts( 545 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 546 ) -> exp.Table: 547 # https://docs.snowflake.com/en/user-guide/querying-stage 548 if self._match(TokenType.STRING, advance=False): 549 table = self._parse_string() 550 elif self._match_text_seq("@", advance=False): 551 table = self._parse_location_path() 552 else: 553 table = None 554 555 if table: 556 file_format = None 557 pattern = None 558 559 wrapped = self._match(TokenType.L_PAREN) 560 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 561 if self._match_text_seq("FILE_FORMAT", "=>"): 562 file_format = self._parse_string() or super()._parse_table_parts( 563 is_db_reference=is_db_reference 564 ) 565 elif self._match_text_seq("PATTERN", "=>"): 566 pattern = self._parse_string() 567 else: 568 break 569 570 self._match(TokenType.COMMA) 571 572 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 573 else: 574 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 575 576 changes = self._parse_changes() 577 if changes: 578 table.set("changes", changes) 579 580 at_before = self._parse_historical_data() 581 if at_before: 582 table.set("when", at_before) 583 584 return table 585 586 def _parse_id_var( 587 self, 588 any_token: bool = True, 589 tokens: t.Optional[t.Collection[TokenType]] = None, 590 ) -> t.Optional[exp.Expression]: 591 if self._match_text_seq("IDENTIFIER", "("): 592 identifier = ( 593 super()._parse_id_var(any_token=any_token, tokens=tokens) 594 or self._parse_string() 595 ) 596 self._match_r_paren() 597 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 598 599 return super()._parse_id_var(any_token=any_token, tokens=tokens) 600 601 def _parse_show_snowflake(self, this: str) -> exp.Show: 602 scope = None 603 scope_kind = None 604 605 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 606 # which is syntactically valid but has no effect on the output 607 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 608 609 history = self._match_text_seq("HISTORY") 610 611 like = self._parse_string() if self._match(TokenType.LIKE) else None 612 613 if self._match(TokenType.IN): 614 if self._match_text_seq("ACCOUNT"): 615 scope_kind = "ACCOUNT" 616 elif self._match_set(self.DB_CREATABLES): 617 scope_kind = self._prev.text.upper() 618 if self._curr: 619 scope = self._parse_table_parts() 620 elif self._curr: 621 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 622 scope = self._parse_table_parts() 623 624 return self.expression( 625 exp.Show, 626 **{ 627 "terse": terse, 628 "this": this, 629 "history": history, 630 "like": like, 631 "scope": scope, 632 "scope_kind": scope_kind, 633 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 634 "limit": self._parse_limit(), 635 "from": self._parse_string() if self._match(TokenType.FROM) else None, 636 }, 637 ) 638 639 def _parse_alter_table_swap(self) -> exp.SwapTable: 640 self._match_text_seq("WITH") 641 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 642 643 def _parse_location_property(self) -> exp.LocationProperty: 644 self._match(TokenType.EQ) 645 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 646 647 def _parse_file_location(self) -> t.Optional[exp.Expression]: 648 # Parse either a subquery or a staged file 649 return ( 650 self._parse_select(table=True, parse_subquery_alias=False) 651 if self._match(TokenType.L_PAREN, advance=False) 652 else self._parse_table_parts() 653 ) 654 655 def _parse_location_path(self) -> exp.Var: 656 parts = [self._advance_any(ignore_reserved=True)] 657 658 # We avoid consuming a comma token because external tables like @foo and @bar 659 # can be joined in a query with a comma separator, as well as closing paren 660 # in case of subqueries 661 while self._is_connected() and not self._match_set( 662 (TokenType.COMMA, TokenType.R_PAREN), advance=False 663 ): 664 parts.append(self._advance_any(ignore_reserved=True)) 665 666 return exp.var("".join(part.text for part in parts if part)) 667 668 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 669 this = super()._parse_lambda_arg() 670 671 if not this: 672 return this 673 674 typ = self._parse_types() 675 676 if typ: 677 return self.expression(exp.Cast, this=this, to=typ) 678 679 return this 680 681 class Tokenizer(tokens.Tokenizer): 682 STRING_ESCAPES = ["\\", "'"] 683 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 684 RAW_STRINGS = ["$$"] 685 COMMENTS = ["--", "//", ("/*", "*/")] 686 687 KEYWORDS = { 688 **tokens.Tokenizer.KEYWORDS, 689 "BYTEINT": TokenType.INT, 690 "CHAR VARYING": TokenType.VARCHAR, 691 "CHARACTER VARYING": TokenType.VARCHAR, 692 "EXCLUDE": TokenType.EXCEPT, 693 "ILIKE ANY": TokenType.ILIKE_ANY, 694 "LIKE ANY": TokenType.LIKE_ANY, 695 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 696 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 697 "MINUS": TokenType.EXCEPT, 698 "NCHAR VARYING": TokenType.VARCHAR, 699 "PUT": TokenType.COMMAND, 700 "REMOVE": TokenType.COMMAND, 701 "RM": TokenType.COMMAND, 702 "SAMPLE": TokenType.TABLE_SAMPLE, 703 "SQL_DOUBLE": TokenType.DOUBLE, 704 "SQL_VARCHAR": TokenType.VARCHAR, 705 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 706 "TAG": TokenType.TAG, 707 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 708 "TOP": TokenType.TOP, 709 "WAREHOUSE": TokenType.WAREHOUSE, 710 "STREAMLIT": TokenType.STREAMLIT, 711 } 712 KEYWORDS.pop("/*+") 713 714 SINGLE_TOKENS = { 715 **tokens.Tokenizer.SINGLE_TOKENS, 716 "$": TokenType.PARAMETER, 717 } 718 719 VAR_SINGLE_TOKENS = {"$"} 720 721 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 722 723 class Generator(generator.Generator): 724 PARAMETER_TOKEN = "$" 725 MATCHED_BY_SOURCE = False 726 SINGLE_STRING_INTERVAL = True 727 JOIN_HINTS = False 728 TABLE_HINTS = False 729 QUERY_HINTS = False 730 AGGREGATE_FILTER_SUPPORTED = False 731 SUPPORTS_TABLE_COPY = False 732 COLLATE_IS_FUNC = True 733 LIMIT_ONLY_LITERALS = True 734 JSON_KEY_VALUE_PAIR_SEP = "," 735 INSERT_OVERWRITE = " OVERWRITE INTO" 736 STRUCT_DELIMITER = ("(", ")") 737 COPY_PARAMS_ARE_WRAPPED = False 738 COPY_PARAMS_EQ_REQUIRED = True 739 STAR_EXCEPT = "EXCLUDE" 740 741 TRANSFORMS = { 742 **generator.Generator.TRANSFORMS, 743 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 744 exp.ArgMax: rename_func("MAX_BY"), 745 exp.ArgMin: rename_func("MIN_BY"), 746 exp.Array: inline_array_sql, 747 exp.ArrayConcat: rename_func("ARRAY_CAT"), 748 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 749 exp.AtTimeZone: lambda self, e: self.func( 750 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 751 ), 752 exp.BitwiseXor: rename_func("BITXOR"), 753 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 754 exp.DateAdd: date_delta_sql("DATEADD"), 755 exp.DateDiff: date_delta_sql("DATEDIFF"), 756 exp.DateStrToDate: datestrtodate_sql, 757 exp.DayOfMonth: rename_func("DAYOFMONTH"), 758 exp.DayOfWeek: rename_func("DAYOFWEEK"), 759 exp.DayOfYear: rename_func("DAYOFYEAR"), 760 exp.Explode: rename_func("FLATTEN"), 761 exp.Extract: rename_func("DATE_PART"), 762 exp.FromTimeZone: lambda self, e: self.func( 763 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 764 ), 765 exp.GenerateSeries: lambda self, e: self.func( 766 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 767 ), 768 exp.GroupConcat: rename_func("LISTAGG"), 769 exp.If: if_sql(name="IFF", false_value="NULL"), 770 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 771 exp.JSONExtractScalar: lambda self, e: self.func( 772 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 773 ), 774 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 775 exp.JSONPathRoot: lambda *_: "", 776 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 777 exp.LogicalOr: rename_func("BOOLOR_AGG"), 778 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 779 exp.Max: max_or_greatest, 780 exp.Min: min_or_least, 781 exp.ParseJSON: lambda self, e: self.func( 782 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 783 ), 784 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 785 exp.PercentileCont: transforms.preprocess( 786 [transforms.add_within_group_for_percentiles] 787 ), 788 exp.PercentileDisc: transforms.preprocess( 789 [transforms.add_within_group_for_percentiles] 790 ), 791 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 792 exp.RegexpILike: _regexpilike_sql, 793 exp.Rand: rename_func("RANDOM"), 794 exp.Select: transforms.preprocess( 795 [ 796 transforms.eliminate_distinct_on, 797 transforms.explode_to_unnest(), 798 transforms.eliminate_semi_and_anti_joins, 799 ] 800 ), 801 exp.SHA: rename_func("SHA1"), 802 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 803 exp.StartsWith: rename_func("STARTSWITH"), 804 exp.StrPosition: lambda self, e: self.func( 805 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 806 ), 807 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 808 exp.Stuff: rename_func("INSERT"), 809 exp.TimeAdd: date_delta_sql("TIMEADD"), 810 exp.TimestampDiff: lambda self, e: self.func( 811 "TIMESTAMPDIFF", e.unit, e.expression, e.this 812 ), 813 exp.TimestampTrunc: timestamptrunc_sql(), 814 exp.TimeStrToTime: timestrtotime_sql, 815 exp.TimeToStr: lambda self, e: self.func( 816 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 817 ), 818 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 819 exp.ToArray: rename_func("TO_ARRAY"), 820 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 821 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 822 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 823 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 824 exp.TsOrDsToDate: lambda self, e: self.func( 825 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 826 ), 827 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 828 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 829 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 830 exp.Xor: rename_func("BOOLXOR"), 831 } 832 833 SUPPORTED_JSON_PATH_PARTS = { 834 exp.JSONPathKey, 835 exp.JSONPathRoot, 836 exp.JSONPathSubscript, 837 } 838 839 TYPE_MAPPING = { 840 **generator.Generator.TYPE_MAPPING, 841 exp.DataType.Type.NESTED: "OBJECT", 842 exp.DataType.Type.STRUCT: "OBJECT", 843 } 844 845 PROPERTIES_LOCATION = { 846 **generator.Generator.PROPERTIES_LOCATION, 847 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 848 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 849 } 850 851 UNSUPPORTED_VALUES_EXPRESSIONS = { 852 exp.Map, 853 exp.StarMap, 854 exp.Struct, 855 exp.VarMap, 856 } 857 858 def with_properties(self, properties: exp.Properties) -> str: 859 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 860 861 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 862 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 863 values_as_table = False 864 865 return super().values_sql(expression, values_as_table=values_as_table) 866 867 def datatype_sql(self, expression: exp.DataType) -> str: 868 expressions = expression.expressions 869 if ( 870 expressions 871 and expression.is_type(*exp.DataType.STRUCT_TYPES) 872 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 873 ): 874 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 875 return "OBJECT" 876 877 return super().datatype_sql(expression) 878 879 def tonumber_sql(self, expression: exp.ToNumber) -> str: 880 return self.func( 881 "TO_NUMBER", 882 expression.this, 883 expression.args.get("format"), 884 expression.args.get("precision"), 885 expression.args.get("scale"), 886 ) 887 888 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 889 milli = expression.args.get("milli") 890 if milli is not None: 891 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 892 expression.set("nano", milli_to_nano) 893 894 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 895 896 def trycast_sql(self, expression: exp.TryCast) -> str: 897 value = expression.this 898 899 if value.type is None: 900 from sqlglot.optimizer.annotate_types import annotate_types 901 902 value = annotate_types(value) 903 904 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 905 return super().trycast_sql(expression) 906 907 # TRY_CAST only works for string values in Snowflake 908 return self.cast_sql(expression) 909 910 def log_sql(self, expression: exp.Log) -> str: 911 if not expression.expression: 912 return self.func("LN", expression.this) 913 914 return super().log_sql(expression) 915 916 def unnest_sql(self, expression: exp.Unnest) -> str: 917 unnest_alias = expression.args.get("alias") 918 offset = expression.args.get("offset") 919 920 columns = [ 921 exp.to_identifier("seq"), 922 exp.to_identifier("key"), 923 exp.to_identifier("path"), 924 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 925 seq_get(unnest_alias.columns if unnest_alias else [], 0) 926 or exp.to_identifier("value"), 927 exp.to_identifier("this"), 928 ] 929 930 if unnest_alias: 931 unnest_alias.set("columns", columns) 932 else: 933 unnest_alias = exp.TableAlias(this="_u", columns=columns) 934 935 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 936 alias = self.sql(unnest_alias) 937 alias = f" AS {alias}" if alias else "" 938 return f"{explode}{alias}" 939 940 def show_sql(self, expression: exp.Show) -> str: 941 terse = "TERSE " if expression.args.get("terse") else "" 942 history = " HISTORY" if expression.args.get("history") else "" 943 like = self.sql(expression, "like") 944 like = f" LIKE {like}" if like else "" 945 946 scope = self.sql(expression, "scope") 947 scope = f" {scope}" if scope else "" 948 949 scope_kind = self.sql(expression, "scope_kind") 950 if scope_kind: 951 scope_kind = f" IN {scope_kind}" 952 953 starts_with = self.sql(expression, "starts_with") 954 if starts_with: 955 starts_with = f" STARTS WITH {starts_with}" 956 957 limit = self.sql(expression, "limit") 958 959 from_ = self.sql(expression, "from") 960 if from_: 961 from_ = f" FROM {from_}" 962 963 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 964 965 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 966 # Other dialects don't support all of the following parameters, so we need to 967 # generate default values as necessary to ensure the transpilation is correct 968 group = expression.args.get("group") 969 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 970 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 971 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 972 973 return self.func( 974 "REGEXP_SUBSTR", 975 expression.this, 976 expression.expression, 977 position, 978 occurrence, 979 parameters, 980 group, 981 ) 982 983 def except_op(self, expression: exp.Except) -> str: 984 if not expression.args.get("distinct"): 985 self.unsupported("EXCEPT with All is not supported in Snowflake") 986 return super().except_op(expression) 987 988 def intersect_op(self, expression: exp.Intersect) -> str: 989 if not expression.args.get("distinct"): 990 self.unsupported("INTERSECT with All is not supported in Snowflake") 991 return super().intersect_op(expression) 992 993 def describe_sql(self, expression: exp.Describe) -> str: 994 # Default to table if kind is unknown 995 kind_value = expression.args.get("kind") or "TABLE" 996 kind = f" {kind_value}" if kind_value else "" 997 this = f" {self.sql(expression, 'this')}" 998 expressions = self.expressions(expression, flat=True) 999 expressions = f" {expressions}" if expressions else "" 1000 return f"DESCRIBE{kind}{this}{expressions}" 1001 1002 def generatedasidentitycolumnconstraint_sql( 1003 self, expression: exp.GeneratedAsIdentityColumnConstraint 1004 ) -> str: 1005 start = expression.args.get("start") 1006 start = f" START {start}" if start else "" 1007 increment = expression.args.get("increment") 1008 increment = f" INCREMENT {increment}" if increment else "" 1009 return f"AUTOINCREMENT{start}{increment}" 1010 1011 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1012 this = self.sql(expression, "this") 1013 return f"SWAP WITH {this}" 1014 1015 def cluster_sql(self, expression: exp.Cluster) -> str: 1016 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1017 1018 def struct_sql(self, expression: exp.Struct) -> str: 1019 keys = [] 1020 values = [] 1021 1022 for i, e in enumerate(expression.expressions): 1023 if isinstance(e, exp.PropertyEQ): 1024 keys.append( 1025 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1026 ) 1027 values.append(e.expression) 1028 else: 1029 keys.append(exp.Literal.string(f"_{i}")) 1030 values.append(e) 1031 1032 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1033 1034 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1035 if expression.args.get("weight") or expression.args.get("accuracy"): 1036 self.unsupported( 1037 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1038 ) 1039 1040 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1041 1042 def alterset_sql(self, expression: exp.AlterSet) -> str: 1043 exprs = self.expressions(expression, flat=True) 1044 exprs = f" {exprs}" if exprs else "" 1045 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1046 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1047 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1048 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1049 tag = self.expressions(expression, key="tag", flat=True) 1050 tag = f" TAG {tag}" if tag else "" 1051 1052 return f"SET{exprs}{file_format}{copy_options}{tag}"
190class Snowflake(Dialect): 191 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 192 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 193 NULL_ORDERING = "nulls_are_large" 194 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 195 SUPPORTS_USER_DEFINED_TYPES = False 196 SUPPORTS_SEMI_ANTI_JOIN = False 197 PREFER_CTE_ALIAS_COLUMN = True 198 TABLESAMPLE_SIZE_IS_PERCENT = True 199 COPY_PARAMS_ARE_CSV = False 200 201 TIME_MAPPING = { 202 "YYYY": "%Y", 203 "yyyy": "%Y", 204 "YY": "%y", 205 "yy": "%y", 206 "MMMM": "%B", 207 "mmmm": "%B", 208 "MON": "%b", 209 "mon": "%b", 210 "MM": "%m", 211 "mm": "%m", 212 "DD": "%d", 213 "dd": "%-d", 214 "DY": "%a", 215 "dy": "%w", 216 "HH24": "%H", 217 "hh24": "%H", 218 "HH12": "%I", 219 "hh12": "%I", 220 "MI": "%M", 221 "mi": "%M", 222 "SS": "%S", 223 "ss": "%S", 224 "FF": "%f", 225 "ff": "%f", 226 "FF6": "%f", 227 "ff6": "%f", 228 } 229 230 def quote_identifier(self, expression: E, identify: bool = True) -> E: 231 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 232 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 233 if ( 234 isinstance(expression, exp.Identifier) 235 and isinstance(expression.parent, exp.Table) 236 and expression.name.lower() == "dual" 237 ): 238 return expression # type: ignore 239 240 return super().quote_identifier(expression, identify=identify) 241 242 class Parser(parser.Parser): 243 IDENTIFY_PIVOT_STRINGS = True 244 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 245 COLON_IS_VARIANT_EXTRACT = True 246 247 ID_VAR_TOKENS = { 248 *parser.Parser.ID_VAR_TOKENS, 249 TokenType.MATCH_CONDITION, 250 } 251 252 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 253 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 258 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 259 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 260 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 261 this=seq_get(args, 1), expression=seq_get(args, 0) 262 ), 263 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 264 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 265 start=seq_get(args, 0), 266 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 267 step=seq_get(args, 2), 268 ), 269 "BITXOR": binary_from_function(exp.BitwiseXor), 270 "BIT_XOR": binary_from_function(exp.BitwiseXor), 271 "BOOLXOR": binary_from_function(exp.Xor), 272 "CONVERT_TIMEZONE": _build_convert_timezone, 273 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 274 "DATE_TRUNC": _date_trunc_to_time, 275 "DATEADD": _build_date_time_add(exp.DateAdd), 276 "DATEDIFF": _build_datediff, 277 "DIV0": _build_if_from_div0, 278 "FLATTEN": exp.Explode.from_arg_list, 279 "GET_PATH": lambda args, dialect: exp.JSONExtract( 280 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 281 ), 282 "IFF": exp.If.from_arg_list, 283 "LAST_DAY": lambda args: exp.LastDay( 284 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 285 ), 286 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 287 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 288 "LISTAGG": exp.GroupConcat.from_arg_list, 289 "MEDIAN": lambda args: exp.PercentileCont( 290 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 291 ), 292 "NULLIFZERO": _build_if_from_nullifzero, 293 "OBJECT_CONSTRUCT": _build_object_construct, 294 "REGEXP_REPLACE": _build_regexp_replace, 295 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 296 "RLIKE": exp.RegexpLike.from_arg_list, 297 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 298 "TIMEADD": _build_date_time_add(exp.TimeAdd), 299 "TIMEDIFF": _build_datediff, 300 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 301 "TIMESTAMPDIFF": _build_datediff, 302 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 303 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 304 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 305 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 306 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 307 "TO_NUMBER": lambda args: exp.ToNumber( 308 this=seq_get(args, 0), 309 format=seq_get(args, 1), 310 precision=seq_get(args, 2), 311 scale=seq_get(args, 3), 312 ), 313 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 314 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 315 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 316 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 317 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 318 "TO_VARCHAR": exp.ToChar.from_arg_list, 319 "ZEROIFNULL": _build_if_from_zeroifnull, 320 } 321 322 FUNCTION_PARSERS = { 323 **parser.Parser.FUNCTION_PARSERS, 324 "DATE_PART": lambda self: self._parse_date_part(), 325 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 326 } 327 FUNCTION_PARSERS.pop("TRIM") 328 329 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 330 331 RANGE_PARSERS = { 332 **parser.Parser.RANGE_PARSERS, 333 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 334 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 335 } 336 337 ALTER_PARSERS = { 338 **parser.Parser.ALTER_PARSERS, 339 "UNSET": lambda self: self.expression( 340 exp.Set, 341 tag=self._match_text_seq("TAG"), 342 expressions=self._parse_csv(self._parse_id_var), 343 unset=True, 344 ), 345 "SWAP": lambda self: self._parse_alter_table_swap(), 346 } 347 348 STATEMENT_PARSERS = { 349 **parser.Parser.STATEMENT_PARSERS, 350 TokenType.SHOW: lambda self: self._parse_show(), 351 } 352 353 PROPERTY_PARSERS = { 354 **parser.Parser.PROPERTY_PARSERS, 355 "LOCATION": lambda self: self._parse_location_property(), 356 } 357 358 TYPE_CONVERTERS = { 359 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 360 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 361 } 362 363 SHOW_PARSERS = { 364 "SCHEMAS": _show_parser("SCHEMAS"), 365 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 366 "OBJECTS": _show_parser("OBJECTS"), 367 "TERSE OBJECTS": _show_parser("OBJECTS"), 368 "TABLES": _show_parser("TABLES"), 369 "TERSE TABLES": _show_parser("TABLES"), 370 "VIEWS": _show_parser("VIEWS"), 371 "TERSE VIEWS": _show_parser("VIEWS"), 372 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 373 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 374 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 375 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 376 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 377 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 378 "SEQUENCES": _show_parser("SEQUENCES"), 379 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 380 "COLUMNS": _show_parser("COLUMNS"), 381 "USERS": _show_parser("USERS"), 382 "TERSE USERS": _show_parser("USERS"), 383 } 384 385 CONSTRAINT_PARSERS = { 386 **parser.Parser.CONSTRAINT_PARSERS, 387 "WITH": lambda self: self._parse_with_constraint(), 388 "MASKING": lambda self: self._parse_with_constraint(), 389 "PROJECTION": lambda self: self._parse_with_constraint(), 390 "TAG": lambda self: self._parse_with_constraint(), 391 } 392 393 STAGED_FILE_SINGLE_TOKENS = { 394 TokenType.DOT, 395 TokenType.MOD, 396 TokenType.SLASH, 397 } 398 399 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 400 401 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 402 403 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 404 405 LAMBDAS = { 406 **parser.Parser.LAMBDAS, 407 TokenType.ARROW: lambda self, expressions: self.expression( 408 exp.Lambda, 409 this=self._replace_lambda( 410 self._parse_assignment(), 411 expressions, 412 ), 413 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 414 ), 415 } 416 417 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 418 if self._prev.token_type != TokenType.WITH: 419 self._retreat(self._index - 1) 420 421 if self._match_text_seq("MASKING", "POLICY"): 422 policy = self._parse_column() 423 return self.expression( 424 exp.MaskingPolicyColumnConstraint, 425 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 426 expressions=self._match(TokenType.USING) 427 and self._parse_wrapped_csv(self._parse_id_var), 428 ) 429 if self._match_text_seq("PROJECTION", "POLICY"): 430 policy = self._parse_column() 431 return self.expression( 432 exp.ProjectionPolicyColumnConstraint, 433 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 434 ) 435 if self._match(TokenType.TAG): 436 return self.expression( 437 exp.TagColumnConstraint, 438 expressions=self._parse_wrapped_csv(self._parse_property), 439 ) 440 441 return None 442 443 def _parse_create(self) -> exp.Create | exp.Command: 444 expression = super()._parse_create() 445 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 446 # Replace the Table node with the enclosed Identifier 447 expression.this.replace(expression.this.this) 448 449 return expression 450 451 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 452 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 453 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 454 this = self._parse_var() or self._parse_type() 455 456 if not this: 457 return None 458 459 self._match(TokenType.COMMA) 460 expression = self._parse_bitwise() 461 this = map_date_part(this) 462 name = this.name.upper() 463 464 if name.startswith("EPOCH"): 465 if name == "EPOCH_MILLISECOND": 466 scale = 10**3 467 elif name == "EPOCH_MICROSECOND": 468 scale = 10**6 469 elif name == "EPOCH_NANOSECOND": 470 scale = 10**9 471 else: 472 scale = None 473 474 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 475 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 476 477 if scale: 478 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 479 480 return to_unix 481 482 return self.expression(exp.Extract, this=this, expression=expression) 483 484 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 485 if is_map: 486 # Keys are strings in Snowflake's objects, see also: 487 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 488 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 489 return self._parse_slice(self._parse_string()) 490 491 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 492 493 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 494 lateral = super()._parse_lateral() 495 if not lateral: 496 return lateral 497 498 if isinstance(lateral.this, exp.Explode): 499 table_alias = lateral.args.get("alias") 500 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 501 if table_alias and not table_alias.args.get("columns"): 502 table_alias.set("columns", columns) 503 elif not table_alias: 504 exp.alias_(lateral, "_flattened", table=columns, copy=False) 505 506 return lateral 507 508 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 509 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 510 index = self._index 511 historical_data = None 512 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 513 this = self._prev.text.upper() 514 kind = ( 515 self._match(TokenType.L_PAREN) 516 and self._match_texts(self.HISTORICAL_DATA_KIND) 517 and self._prev.text.upper() 518 ) 519 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 520 521 if expression: 522 self._match_r_paren() 523 historical_data = self.expression( 524 exp.HistoricalData, this=this, kind=kind, expression=expression 525 ) 526 else: 527 self._retreat(index) 528 529 return historical_data 530 531 def _parse_changes(self) -> t.Optional[exp.Changes]: 532 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 533 return None 534 535 information = self._parse_var(any_token=True) 536 self._match_r_paren() 537 538 return self.expression( 539 exp.Changes, 540 information=information, 541 at_before=self._parse_historical_data(), 542 end=self._parse_historical_data(), 543 ) 544 545 def _parse_table_parts( 546 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 547 ) -> exp.Table: 548 # https://docs.snowflake.com/en/user-guide/querying-stage 549 if self._match(TokenType.STRING, advance=False): 550 table = self._parse_string() 551 elif self._match_text_seq("@", advance=False): 552 table = self._parse_location_path() 553 else: 554 table = None 555 556 if table: 557 file_format = None 558 pattern = None 559 560 wrapped = self._match(TokenType.L_PAREN) 561 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 562 if self._match_text_seq("FILE_FORMAT", "=>"): 563 file_format = self._parse_string() or super()._parse_table_parts( 564 is_db_reference=is_db_reference 565 ) 566 elif self._match_text_seq("PATTERN", "=>"): 567 pattern = self._parse_string() 568 else: 569 break 570 571 self._match(TokenType.COMMA) 572 573 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 574 else: 575 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 576 577 changes = self._parse_changes() 578 if changes: 579 table.set("changes", changes) 580 581 at_before = self._parse_historical_data() 582 if at_before: 583 table.set("when", at_before) 584 585 return table 586 587 def _parse_id_var( 588 self, 589 any_token: bool = True, 590 tokens: t.Optional[t.Collection[TokenType]] = None, 591 ) -> t.Optional[exp.Expression]: 592 if self._match_text_seq("IDENTIFIER", "("): 593 identifier = ( 594 super()._parse_id_var(any_token=any_token, tokens=tokens) 595 or self._parse_string() 596 ) 597 self._match_r_paren() 598 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 599 600 return super()._parse_id_var(any_token=any_token, tokens=tokens) 601 602 def _parse_show_snowflake(self, this: str) -> exp.Show: 603 scope = None 604 scope_kind = None 605 606 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 607 # which is syntactically valid but has no effect on the output 608 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 609 610 history = self._match_text_seq("HISTORY") 611 612 like = self._parse_string() if self._match(TokenType.LIKE) else None 613 614 if self._match(TokenType.IN): 615 if self._match_text_seq("ACCOUNT"): 616 scope_kind = "ACCOUNT" 617 elif self._match_set(self.DB_CREATABLES): 618 scope_kind = self._prev.text.upper() 619 if self._curr: 620 scope = self._parse_table_parts() 621 elif self._curr: 622 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 623 scope = self._parse_table_parts() 624 625 return self.expression( 626 exp.Show, 627 **{ 628 "terse": terse, 629 "this": this, 630 "history": history, 631 "like": like, 632 "scope": scope, 633 "scope_kind": scope_kind, 634 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 635 "limit": self._parse_limit(), 636 "from": self._parse_string() if self._match(TokenType.FROM) else None, 637 }, 638 ) 639 640 def _parse_alter_table_swap(self) -> exp.SwapTable: 641 self._match_text_seq("WITH") 642 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 643 644 def _parse_location_property(self) -> exp.LocationProperty: 645 self._match(TokenType.EQ) 646 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 647 648 def _parse_file_location(self) -> t.Optional[exp.Expression]: 649 # Parse either a subquery or a staged file 650 return ( 651 self._parse_select(table=True, parse_subquery_alias=False) 652 if self._match(TokenType.L_PAREN, advance=False) 653 else self._parse_table_parts() 654 ) 655 656 def _parse_location_path(self) -> exp.Var: 657 parts = [self._advance_any(ignore_reserved=True)] 658 659 # We avoid consuming a comma token because external tables like @foo and @bar 660 # can be joined in a query with a comma separator, as well as closing paren 661 # in case of subqueries 662 while self._is_connected() and not self._match_set( 663 (TokenType.COMMA, TokenType.R_PAREN), advance=False 664 ): 665 parts.append(self._advance_any(ignore_reserved=True)) 666 667 return exp.var("".join(part.text for part in parts if part)) 668 669 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 670 this = super()._parse_lambda_arg() 671 672 if not this: 673 return this 674 675 typ = self._parse_types() 676 677 if typ: 678 return self.expression(exp.Cast, this=this, to=typ) 679 680 return this 681 682 class Tokenizer(tokens.Tokenizer): 683 STRING_ESCAPES = ["\\", "'"] 684 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 685 RAW_STRINGS = ["$$"] 686 COMMENTS = ["--", "//", ("/*", "*/")] 687 688 KEYWORDS = { 689 **tokens.Tokenizer.KEYWORDS, 690 "BYTEINT": TokenType.INT, 691 "CHAR VARYING": TokenType.VARCHAR, 692 "CHARACTER VARYING": TokenType.VARCHAR, 693 "EXCLUDE": TokenType.EXCEPT, 694 "ILIKE ANY": TokenType.ILIKE_ANY, 695 "LIKE ANY": TokenType.LIKE_ANY, 696 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 697 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 698 "MINUS": TokenType.EXCEPT, 699 "NCHAR VARYING": TokenType.VARCHAR, 700 "PUT": TokenType.COMMAND, 701 "REMOVE": TokenType.COMMAND, 702 "RM": TokenType.COMMAND, 703 "SAMPLE": TokenType.TABLE_SAMPLE, 704 "SQL_DOUBLE": TokenType.DOUBLE, 705 "SQL_VARCHAR": TokenType.VARCHAR, 706 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 707 "TAG": TokenType.TAG, 708 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 709 "TOP": TokenType.TOP, 710 "WAREHOUSE": TokenType.WAREHOUSE, 711 "STREAMLIT": TokenType.STREAMLIT, 712 } 713 KEYWORDS.pop("/*+") 714 715 SINGLE_TOKENS = { 716 **tokens.Tokenizer.SINGLE_TOKENS, 717 "$": TokenType.PARAMETER, 718 } 719 720 VAR_SINGLE_TOKENS = {"$"} 721 722 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 723 724 class Generator(generator.Generator): 725 PARAMETER_TOKEN = "$" 726 MATCHED_BY_SOURCE = False 727 SINGLE_STRING_INTERVAL = True 728 JOIN_HINTS = False 729 TABLE_HINTS = False 730 QUERY_HINTS = False 731 AGGREGATE_FILTER_SUPPORTED = False 732 SUPPORTS_TABLE_COPY = False 733 COLLATE_IS_FUNC = True 734 LIMIT_ONLY_LITERALS = True 735 JSON_KEY_VALUE_PAIR_SEP = "," 736 INSERT_OVERWRITE = " OVERWRITE INTO" 737 STRUCT_DELIMITER = ("(", ")") 738 COPY_PARAMS_ARE_WRAPPED = False 739 COPY_PARAMS_EQ_REQUIRED = True 740 STAR_EXCEPT = "EXCLUDE" 741 742 TRANSFORMS = { 743 **generator.Generator.TRANSFORMS, 744 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 745 exp.ArgMax: rename_func("MAX_BY"), 746 exp.ArgMin: rename_func("MIN_BY"), 747 exp.Array: inline_array_sql, 748 exp.ArrayConcat: rename_func("ARRAY_CAT"), 749 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 750 exp.AtTimeZone: lambda self, e: self.func( 751 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 752 ), 753 exp.BitwiseXor: rename_func("BITXOR"), 754 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 755 exp.DateAdd: date_delta_sql("DATEADD"), 756 exp.DateDiff: date_delta_sql("DATEDIFF"), 757 exp.DateStrToDate: datestrtodate_sql, 758 exp.DayOfMonth: rename_func("DAYOFMONTH"), 759 exp.DayOfWeek: rename_func("DAYOFWEEK"), 760 exp.DayOfYear: rename_func("DAYOFYEAR"), 761 exp.Explode: rename_func("FLATTEN"), 762 exp.Extract: rename_func("DATE_PART"), 763 exp.FromTimeZone: lambda self, e: self.func( 764 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 765 ), 766 exp.GenerateSeries: lambda self, e: self.func( 767 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 768 ), 769 exp.GroupConcat: rename_func("LISTAGG"), 770 exp.If: if_sql(name="IFF", false_value="NULL"), 771 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 772 exp.JSONExtractScalar: lambda self, e: self.func( 773 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 774 ), 775 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 776 exp.JSONPathRoot: lambda *_: "", 777 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 778 exp.LogicalOr: rename_func("BOOLOR_AGG"), 779 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 780 exp.Max: max_or_greatest, 781 exp.Min: min_or_least, 782 exp.ParseJSON: lambda self, e: self.func( 783 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 784 ), 785 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 786 exp.PercentileCont: transforms.preprocess( 787 [transforms.add_within_group_for_percentiles] 788 ), 789 exp.PercentileDisc: transforms.preprocess( 790 [transforms.add_within_group_for_percentiles] 791 ), 792 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 793 exp.RegexpILike: _regexpilike_sql, 794 exp.Rand: rename_func("RANDOM"), 795 exp.Select: transforms.preprocess( 796 [ 797 transforms.eliminate_distinct_on, 798 transforms.explode_to_unnest(), 799 transforms.eliminate_semi_and_anti_joins, 800 ] 801 ), 802 exp.SHA: rename_func("SHA1"), 803 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 804 exp.StartsWith: rename_func("STARTSWITH"), 805 exp.StrPosition: lambda self, e: self.func( 806 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 807 ), 808 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 809 exp.Stuff: rename_func("INSERT"), 810 exp.TimeAdd: date_delta_sql("TIMEADD"), 811 exp.TimestampDiff: lambda self, e: self.func( 812 "TIMESTAMPDIFF", e.unit, e.expression, e.this 813 ), 814 exp.TimestampTrunc: timestamptrunc_sql(), 815 exp.TimeStrToTime: timestrtotime_sql, 816 exp.TimeToStr: lambda self, e: self.func( 817 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 818 ), 819 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 820 exp.ToArray: rename_func("TO_ARRAY"), 821 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 822 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 823 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 824 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 825 exp.TsOrDsToDate: lambda self, e: self.func( 826 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 827 ), 828 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 829 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 830 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 831 exp.Xor: rename_func("BOOLXOR"), 832 } 833 834 SUPPORTED_JSON_PATH_PARTS = { 835 exp.JSONPathKey, 836 exp.JSONPathRoot, 837 exp.JSONPathSubscript, 838 } 839 840 TYPE_MAPPING = { 841 **generator.Generator.TYPE_MAPPING, 842 exp.DataType.Type.NESTED: "OBJECT", 843 exp.DataType.Type.STRUCT: "OBJECT", 844 } 845 846 PROPERTIES_LOCATION = { 847 **generator.Generator.PROPERTIES_LOCATION, 848 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 849 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 850 } 851 852 UNSUPPORTED_VALUES_EXPRESSIONS = { 853 exp.Map, 854 exp.StarMap, 855 exp.Struct, 856 exp.VarMap, 857 } 858 859 def with_properties(self, properties: exp.Properties) -> str: 860 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 861 862 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 863 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 864 values_as_table = False 865 866 return super().values_sql(expression, values_as_table=values_as_table) 867 868 def datatype_sql(self, expression: exp.DataType) -> str: 869 expressions = expression.expressions 870 if ( 871 expressions 872 and expression.is_type(*exp.DataType.STRUCT_TYPES) 873 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 874 ): 875 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 876 return "OBJECT" 877 878 return super().datatype_sql(expression) 879 880 def tonumber_sql(self, expression: exp.ToNumber) -> str: 881 return self.func( 882 "TO_NUMBER", 883 expression.this, 884 expression.args.get("format"), 885 expression.args.get("precision"), 886 expression.args.get("scale"), 887 ) 888 889 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 890 milli = expression.args.get("milli") 891 if milli is not None: 892 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 893 expression.set("nano", milli_to_nano) 894 895 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 896 897 def trycast_sql(self, expression: exp.TryCast) -> str: 898 value = expression.this 899 900 if value.type is None: 901 from sqlglot.optimizer.annotate_types import annotate_types 902 903 value = annotate_types(value) 904 905 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 906 return super().trycast_sql(expression) 907 908 # TRY_CAST only works for string values in Snowflake 909 return self.cast_sql(expression) 910 911 def log_sql(self, expression: exp.Log) -> str: 912 if not expression.expression: 913 return self.func("LN", expression.this) 914 915 return super().log_sql(expression) 916 917 def unnest_sql(self, expression: exp.Unnest) -> str: 918 unnest_alias = expression.args.get("alias") 919 offset = expression.args.get("offset") 920 921 columns = [ 922 exp.to_identifier("seq"), 923 exp.to_identifier("key"), 924 exp.to_identifier("path"), 925 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 926 seq_get(unnest_alias.columns if unnest_alias else [], 0) 927 or exp.to_identifier("value"), 928 exp.to_identifier("this"), 929 ] 930 931 if unnest_alias: 932 unnest_alias.set("columns", columns) 933 else: 934 unnest_alias = exp.TableAlias(this="_u", columns=columns) 935 936 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 937 alias = self.sql(unnest_alias) 938 alias = f" AS {alias}" if alias else "" 939 return f"{explode}{alias}" 940 941 def show_sql(self, expression: exp.Show) -> str: 942 terse = "TERSE " if expression.args.get("terse") else "" 943 history = " HISTORY" if expression.args.get("history") else "" 944 like = self.sql(expression, "like") 945 like = f" LIKE {like}" if like else "" 946 947 scope = self.sql(expression, "scope") 948 scope = f" {scope}" if scope else "" 949 950 scope_kind = self.sql(expression, "scope_kind") 951 if scope_kind: 952 scope_kind = f" IN {scope_kind}" 953 954 starts_with = self.sql(expression, "starts_with") 955 if starts_with: 956 starts_with = f" STARTS WITH {starts_with}" 957 958 limit = self.sql(expression, "limit") 959 960 from_ = self.sql(expression, "from") 961 if from_: 962 from_ = f" FROM {from_}" 963 964 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 965 966 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 967 # Other dialects don't support all of the following parameters, so we need to 968 # generate default values as necessary to ensure the transpilation is correct 969 group = expression.args.get("group") 970 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 971 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 972 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 973 974 return self.func( 975 "REGEXP_SUBSTR", 976 expression.this, 977 expression.expression, 978 position, 979 occurrence, 980 parameters, 981 group, 982 ) 983 984 def except_op(self, expression: exp.Except) -> str: 985 if not expression.args.get("distinct"): 986 self.unsupported("EXCEPT with All is not supported in Snowflake") 987 return super().except_op(expression) 988 989 def intersect_op(self, expression: exp.Intersect) -> str: 990 if not expression.args.get("distinct"): 991 self.unsupported("INTERSECT with All is not supported in Snowflake") 992 return super().intersect_op(expression) 993 994 def describe_sql(self, expression: exp.Describe) -> str: 995 # Default to table if kind is unknown 996 kind_value = expression.args.get("kind") or "TABLE" 997 kind = f" {kind_value}" if kind_value else "" 998 this = f" {self.sql(expression, 'this')}" 999 expressions = self.expressions(expression, flat=True) 1000 expressions = f" {expressions}" if expressions else "" 1001 return f"DESCRIBE{kind}{this}{expressions}" 1002 1003 def generatedasidentitycolumnconstraint_sql( 1004 self, expression: exp.GeneratedAsIdentityColumnConstraint 1005 ) -> str: 1006 start = expression.args.get("start") 1007 start = f" START {start}" if start else "" 1008 increment = expression.args.get("increment") 1009 increment = f" INCREMENT {increment}" if increment else "" 1010 return f"AUTOINCREMENT{start}{increment}" 1011 1012 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1013 this = self.sql(expression, "this") 1014 return f"SWAP WITH {this}" 1015 1016 def cluster_sql(self, expression: exp.Cluster) -> str: 1017 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1018 1019 def struct_sql(self, expression: exp.Struct) -> str: 1020 keys = [] 1021 values = [] 1022 1023 for i, e in enumerate(expression.expressions): 1024 if isinstance(e, exp.PropertyEQ): 1025 keys.append( 1026 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1027 ) 1028 values.append(e.expression) 1029 else: 1030 keys.append(exp.Literal.string(f"_{i}")) 1031 values.append(e) 1032 1033 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1034 1035 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1036 if expression.args.get("weight") or expression.args.get("accuracy"): 1037 self.unsupported( 1038 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1039 ) 1040 1041 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1042 1043 def alterset_sql(self, expression: exp.AlterSet) -> str: 1044 exprs = self.expressions(expression, flat=True) 1045 exprs = f" {exprs}" if exprs else "" 1046 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1047 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1048 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1049 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1050 tag = self.expressions(expression, key="tag", flat=True) 1051 tag = f" TAG {tag}" if tag else "" 1052 1053 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
230 def quote_identifier(self, expression: E, identify: bool = True) -> E: 231 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 232 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 233 if ( 234 isinstance(expression, exp.Identifier) 235 and isinstance(expression.parent, exp.Table) 236 and expression.name.lower() == "dual" 237 ): 238 return expression # type: ignore 239 240 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
242 class Parser(parser.Parser): 243 IDENTIFY_PIVOT_STRINGS = True 244 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 245 COLON_IS_VARIANT_EXTRACT = True 246 247 ID_VAR_TOKENS = { 248 *parser.Parser.ID_VAR_TOKENS, 249 TokenType.MATCH_CONDITION, 250 } 251 252 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 253 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 258 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 259 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 260 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 261 this=seq_get(args, 1), expression=seq_get(args, 0) 262 ), 263 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 264 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 265 start=seq_get(args, 0), 266 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 267 step=seq_get(args, 2), 268 ), 269 "BITXOR": binary_from_function(exp.BitwiseXor), 270 "BIT_XOR": binary_from_function(exp.BitwiseXor), 271 "BOOLXOR": binary_from_function(exp.Xor), 272 "CONVERT_TIMEZONE": _build_convert_timezone, 273 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 274 "DATE_TRUNC": _date_trunc_to_time, 275 "DATEADD": _build_date_time_add(exp.DateAdd), 276 "DATEDIFF": _build_datediff, 277 "DIV0": _build_if_from_div0, 278 "FLATTEN": exp.Explode.from_arg_list, 279 "GET_PATH": lambda args, dialect: exp.JSONExtract( 280 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 281 ), 282 "IFF": exp.If.from_arg_list, 283 "LAST_DAY": lambda args: exp.LastDay( 284 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 285 ), 286 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 287 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 288 "LISTAGG": exp.GroupConcat.from_arg_list, 289 "MEDIAN": lambda args: exp.PercentileCont( 290 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 291 ), 292 "NULLIFZERO": _build_if_from_nullifzero, 293 "OBJECT_CONSTRUCT": _build_object_construct, 294 "REGEXP_REPLACE": _build_regexp_replace, 295 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 296 "RLIKE": exp.RegexpLike.from_arg_list, 297 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 298 "TIMEADD": _build_date_time_add(exp.TimeAdd), 299 "TIMEDIFF": _build_datediff, 300 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 301 "TIMESTAMPDIFF": _build_datediff, 302 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 303 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 304 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 305 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 306 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 307 "TO_NUMBER": lambda args: exp.ToNumber( 308 this=seq_get(args, 0), 309 format=seq_get(args, 1), 310 precision=seq_get(args, 2), 311 scale=seq_get(args, 3), 312 ), 313 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 314 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 315 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 316 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 317 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 318 "TO_VARCHAR": exp.ToChar.from_arg_list, 319 "ZEROIFNULL": _build_if_from_zeroifnull, 320 } 321 322 FUNCTION_PARSERS = { 323 **parser.Parser.FUNCTION_PARSERS, 324 "DATE_PART": lambda self: self._parse_date_part(), 325 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 326 } 327 FUNCTION_PARSERS.pop("TRIM") 328 329 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 330 331 RANGE_PARSERS = { 332 **parser.Parser.RANGE_PARSERS, 333 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 334 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 335 } 336 337 ALTER_PARSERS = { 338 **parser.Parser.ALTER_PARSERS, 339 "UNSET": lambda self: self.expression( 340 exp.Set, 341 tag=self._match_text_seq("TAG"), 342 expressions=self._parse_csv(self._parse_id_var), 343 unset=True, 344 ), 345 "SWAP": lambda self: self._parse_alter_table_swap(), 346 } 347 348 STATEMENT_PARSERS = { 349 **parser.Parser.STATEMENT_PARSERS, 350 TokenType.SHOW: lambda self: self._parse_show(), 351 } 352 353 PROPERTY_PARSERS = { 354 **parser.Parser.PROPERTY_PARSERS, 355 "LOCATION": lambda self: self._parse_location_property(), 356 } 357 358 TYPE_CONVERTERS = { 359 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 360 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 361 } 362 363 SHOW_PARSERS = { 364 "SCHEMAS": _show_parser("SCHEMAS"), 365 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 366 "OBJECTS": _show_parser("OBJECTS"), 367 "TERSE OBJECTS": _show_parser("OBJECTS"), 368 "TABLES": _show_parser("TABLES"), 369 "TERSE TABLES": _show_parser("TABLES"), 370 "VIEWS": _show_parser("VIEWS"), 371 "TERSE VIEWS": _show_parser("VIEWS"), 372 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 373 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 374 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 375 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 376 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 377 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 378 "SEQUENCES": _show_parser("SEQUENCES"), 379 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 380 "COLUMNS": _show_parser("COLUMNS"), 381 "USERS": _show_parser("USERS"), 382 "TERSE USERS": _show_parser("USERS"), 383 } 384 385 CONSTRAINT_PARSERS = { 386 **parser.Parser.CONSTRAINT_PARSERS, 387 "WITH": lambda self: self._parse_with_constraint(), 388 "MASKING": lambda self: self._parse_with_constraint(), 389 "PROJECTION": lambda self: self._parse_with_constraint(), 390 "TAG": lambda self: self._parse_with_constraint(), 391 } 392 393 STAGED_FILE_SINGLE_TOKENS = { 394 TokenType.DOT, 395 TokenType.MOD, 396 TokenType.SLASH, 397 } 398 399 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 400 401 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 402 403 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 404 405 LAMBDAS = { 406 **parser.Parser.LAMBDAS, 407 TokenType.ARROW: lambda self, expressions: self.expression( 408 exp.Lambda, 409 this=self._replace_lambda( 410 self._parse_assignment(), 411 expressions, 412 ), 413 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 414 ), 415 } 416 417 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 418 if self._prev.token_type != TokenType.WITH: 419 self._retreat(self._index - 1) 420 421 if self._match_text_seq("MASKING", "POLICY"): 422 policy = self._parse_column() 423 return self.expression( 424 exp.MaskingPolicyColumnConstraint, 425 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 426 expressions=self._match(TokenType.USING) 427 and self._parse_wrapped_csv(self._parse_id_var), 428 ) 429 if self._match_text_seq("PROJECTION", "POLICY"): 430 policy = self._parse_column() 431 return self.expression( 432 exp.ProjectionPolicyColumnConstraint, 433 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 434 ) 435 if self._match(TokenType.TAG): 436 return self.expression( 437 exp.TagColumnConstraint, 438 expressions=self._parse_wrapped_csv(self._parse_property), 439 ) 440 441 return None 442 443 def _parse_create(self) -> exp.Create | exp.Command: 444 expression = super()._parse_create() 445 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 446 # Replace the Table node with the enclosed Identifier 447 expression.this.replace(expression.this.this) 448 449 return expression 450 451 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 452 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 453 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 454 this = self._parse_var() or self._parse_type() 455 456 if not this: 457 return None 458 459 self._match(TokenType.COMMA) 460 expression = self._parse_bitwise() 461 this = map_date_part(this) 462 name = this.name.upper() 463 464 if name.startswith("EPOCH"): 465 if name == "EPOCH_MILLISECOND": 466 scale = 10**3 467 elif name == "EPOCH_MICROSECOND": 468 scale = 10**6 469 elif name == "EPOCH_NANOSECOND": 470 scale = 10**9 471 else: 472 scale = None 473 474 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 475 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 476 477 if scale: 478 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 479 480 return to_unix 481 482 return self.expression(exp.Extract, this=this, expression=expression) 483 484 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 485 if is_map: 486 # Keys are strings in Snowflake's objects, see also: 487 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 488 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 489 return self._parse_slice(self._parse_string()) 490 491 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 492 493 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 494 lateral = super()._parse_lateral() 495 if not lateral: 496 return lateral 497 498 if isinstance(lateral.this, exp.Explode): 499 table_alias = lateral.args.get("alias") 500 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 501 if table_alias and not table_alias.args.get("columns"): 502 table_alias.set("columns", columns) 503 elif not table_alias: 504 exp.alias_(lateral, "_flattened", table=columns, copy=False) 505 506 return lateral 507 508 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 509 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 510 index = self._index 511 historical_data = None 512 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 513 this = self._prev.text.upper() 514 kind = ( 515 self._match(TokenType.L_PAREN) 516 and self._match_texts(self.HISTORICAL_DATA_KIND) 517 and self._prev.text.upper() 518 ) 519 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 520 521 if expression: 522 self._match_r_paren() 523 historical_data = self.expression( 524 exp.HistoricalData, this=this, kind=kind, expression=expression 525 ) 526 else: 527 self._retreat(index) 528 529 return historical_data 530 531 def _parse_changes(self) -> t.Optional[exp.Changes]: 532 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 533 return None 534 535 information = self._parse_var(any_token=True) 536 self._match_r_paren() 537 538 return self.expression( 539 exp.Changes, 540 information=information, 541 at_before=self._parse_historical_data(), 542 end=self._parse_historical_data(), 543 ) 544 545 def _parse_table_parts( 546 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 547 ) -> exp.Table: 548 # https://docs.snowflake.com/en/user-guide/querying-stage 549 if self._match(TokenType.STRING, advance=False): 550 table = self._parse_string() 551 elif self._match_text_seq("@", advance=False): 552 table = self._parse_location_path() 553 else: 554 table = None 555 556 if table: 557 file_format = None 558 pattern = None 559 560 wrapped = self._match(TokenType.L_PAREN) 561 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 562 if self._match_text_seq("FILE_FORMAT", "=>"): 563 file_format = self._parse_string() or super()._parse_table_parts( 564 is_db_reference=is_db_reference 565 ) 566 elif self._match_text_seq("PATTERN", "=>"): 567 pattern = self._parse_string() 568 else: 569 break 570 571 self._match(TokenType.COMMA) 572 573 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 574 else: 575 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 576 577 changes = self._parse_changes() 578 if changes: 579 table.set("changes", changes) 580 581 at_before = self._parse_historical_data() 582 if at_before: 583 table.set("when", at_before) 584 585 return table 586 587 def _parse_id_var( 588 self, 589 any_token: bool = True, 590 tokens: t.Optional[t.Collection[TokenType]] = None, 591 ) -> t.Optional[exp.Expression]: 592 if self._match_text_seq("IDENTIFIER", "("): 593 identifier = ( 594 super()._parse_id_var(any_token=any_token, tokens=tokens) 595 or self._parse_string() 596 ) 597 self._match_r_paren() 598 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 599 600 return super()._parse_id_var(any_token=any_token, tokens=tokens) 601 602 def _parse_show_snowflake(self, this: str) -> exp.Show: 603 scope = None 604 scope_kind = None 605 606 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 607 # which is syntactically valid but has no effect on the output 608 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 609 610 history = self._match_text_seq("HISTORY") 611 612 like = self._parse_string() if self._match(TokenType.LIKE) else None 613 614 if self._match(TokenType.IN): 615 if self._match_text_seq("ACCOUNT"): 616 scope_kind = "ACCOUNT" 617 elif self._match_set(self.DB_CREATABLES): 618 scope_kind = self._prev.text.upper() 619 if self._curr: 620 scope = self._parse_table_parts() 621 elif self._curr: 622 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 623 scope = self._parse_table_parts() 624 625 return self.expression( 626 exp.Show, 627 **{ 628 "terse": terse, 629 "this": this, 630 "history": history, 631 "like": like, 632 "scope": scope, 633 "scope_kind": scope_kind, 634 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 635 "limit": self._parse_limit(), 636 "from": self._parse_string() if self._match(TokenType.FROM) else None, 637 }, 638 ) 639 640 def _parse_alter_table_swap(self) -> exp.SwapTable: 641 self._match_text_seq("WITH") 642 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 643 644 def _parse_location_property(self) -> exp.LocationProperty: 645 self._match(TokenType.EQ) 646 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 647 648 def _parse_file_location(self) -> t.Optional[exp.Expression]: 649 # Parse either a subquery or a staged file 650 return ( 651 self._parse_select(table=True, parse_subquery_alias=False) 652 if self._match(TokenType.L_PAREN, advance=False) 653 else self._parse_table_parts() 654 ) 655 656 def _parse_location_path(self) -> exp.Var: 657 parts = [self._advance_any(ignore_reserved=True)] 658 659 # We avoid consuming a comma token because external tables like @foo and @bar 660 # can be joined in a query with a comma separator, as well as closing paren 661 # in case of subqueries 662 while self._is_connected() and not self._match_set( 663 (TokenType.COMMA, TokenType.R_PAREN), advance=False 664 ): 665 parts.append(self._advance_any(ignore_reserved=True)) 666 667 return exp.var("".join(part.text for part in parts if part)) 668 669 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 670 this = super()._parse_lambda_arg() 671 672 if not this: 673 return this 674 675 typ = self._parse_types() 676 677 if typ: 678 return self.expression(exp.Cast, this=this, to=typ) 679 680 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
682 class Tokenizer(tokens.Tokenizer): 683 STRING_ESCAPES = ["\\", "'"] 684 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 685 RAW_STRINGS = ["$$"] 686 COMMENTS = ["--", "//", ("/*", "*/")] 687 688 KEYWORDS = { 689 **tokens.Tokenizer.KEYWORDS, 690 "BYTEINT": TokenType.INT, 691 "CHAR VARYING": TokenType.VARCHAR, 692 "CHARACTER VARYING": TokenType.VARCHAR, 693 "EXCLUDE": TokenType.EXCEPT, 694 "ILIKE ANY": TokenType.ILIKE_ANY, 695 "LIKE ANY": TokenType.LIKE_ANY, 696 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 697 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 698 "MINUS": TokenType.EXCEPT, 699 "NCHAR VARYING": TokenType.VARCHAR, 700 "PUT": TokenType.COMMAND, 701 "REMOVE": TokenType.COMMAND, 702 "RM": TokenType.COMMAND, 703 "SAMPLE": TokenType.TABLE_SAMPLE, 704 "SQL_DOUBLE": TokenType.DOUBLE, 705 "SQL_VARCHAR": TokenType.VARCHAR, 706 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 707 "TAG": TokenType.TAG, 708 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 709 "TOP": TokenType.TOP, 710 "WAREHOUSE": TokenType.WAREHOUSE, 711 "STREAMLIT": TokenType.STREAMLIT, 712 } 713 KEYWORDS.pop("/*+") 714 715 SINGLE_TOKENS = { 716 **tokens.Tokenizer.SINGLE_TOKENS, 717 "$": TokenType.PARAMETER, 718 } 719 720 VAR_SINGLE_TOKENS = {"$"} 721 722 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
724 class Generator(generator.Generator): 725 PARAMETER_TOKEN = "$" 726 MATCHED_BY_SOURCE = False 727 SINGLE_STRING_INTERVAL = True 728 JOIN_HINTS = False 729 TABLE_HINTS = False 730 QUERY_HINTS = False 731 AGGREGATE_FILTER_SUPPORTED = False 732 SUPPORTS_TABLE_COPY = False 733 COLLATE_IS_FUNC = True 734 LIMIT_ONLY_LITERALS = True 735 JSON_KEY_VALUE_PAIR_SEP = "," 736 INSERT_OVERWRITE = " OVERWRITE INTO" 737 STRUCT_DELIMITER = ("(", ")") 738 COPY_PARAMS_ARE_WRAPPED = False 739 COPY_PARAMS_EQ_REQUIRED = True 740 STAR_EXCEPT = "EXCLUDE" 741 742 TRANSFORMS = { 743 **generator.Generator.TRANSFORMS, 744 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 745 exp.ArgMax: rename_func("MAX_BY"), 746 exp.ArgMin: rename_func("MIN_BY"), 747 exp.Array: inline_array_sql, 748 exp.ArrayConcat: rename_func("ARRAY_CAT"), 749 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 750 exp.AtTimeZone: lambda self, e: self.func( 751 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 752 ), 753 exp.BitwiseXor: rename_func("BITXOR"), 754 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 755 exp.DateAdd: date_delta_sql("DATEADD"), 756 exp.DateDiff: date_delta_sql("DATEDIFF"), 757 exp.DateStrToDate: datestrtodate_sql, 758 exp.DayOfMonth: rename_func("DAYOFMONTH"), 759 exp.DayOfWeek: rename_func("DAYOFWEEK"), 760 exp.DayOfYear: rename_func("DAYOFYEAR"), 761 exp.Explode: rename_func("FLATTEN"), 762 exp.Extract: rename_func("DATE_PART"), 763 exp.FromTimeZone: lambda self, e: self.func( 764 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 765 ), 766 exp.GenerateSeries: lambda self, e: self.func( 767 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 768 ), 769 exp.GroupConcat: rename_func("LISTAGG"), 770 exp.If: if_sql(name="IFF", false_value="NULL"), 771 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 772 exp.JSONExtractScalar: lambda self, e: self.func( 773 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 774 ), 775 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 776 exp.JSONPathRoot: lambda *_: "", 777 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 778 exp.LogicalOr: rename_func("BOOLOR_AGG"), 779 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 780 exp.Max: max_or_greatest, 781 exp.Min: min_or_least, 782 exp.ParseJSON: lambda self, e: self.func( 783 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 784 ), 785 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 786 exp.PercentileCont: transforms.preprocess( 787 [transforms.add_within_group_for_percentiles] 788 ), 789 exp.PercentileDisc: transforms.preprocess( 790 [transforms.add_within_group_for_percentiles] 791 ), 792 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 793 exp.RegexpILike: _regexpilike_sql, 794 exp.Rand: rename_func("RANDOM"), 795 exp.Select: transforms.preprocess( 796 [ 797 transforms.eliminate_distinct_on, 798 transforms.explode_to_unnest(), 799 transforms.eliminate_semi_and_anti_joins, 800 ] 801 ), 802 exp.SHA: rename_func("SHA1"), 803 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 804 exp.StartsWith: rename_func("STARTSWITH"), 805 exp.StrPosition: lambda self, e: self.func( 806 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 807 ), 808 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 809 exp.Stuff: rename_func("INSERT"), 810 exp.TimeAdd: date_delta_sql("TIMEADD"), 811 exp.TimestampDiff: lambda self, e: self.func( 812 "TIMESTAMPDIFF", e.unit, e.expression, e.this 813 ), 814 exp.TimestampTrunc: timestamptrunc_sql(), 815 exp.TimeStrToTime: timestrtotime_sql, 816 exp.TimeToStr: lambda self, e: self.func( 817 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 818 ), 819 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 820 exp.ToArray: rename_func("TO_ARRAY"), 821 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 822 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 823 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 824 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 825 exp.TsOrDsToDate: lambda self, e: self.func( 826 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 827 ), 828 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 829 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 830 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 831 exp.Xor: rename_func("BOOLXOR"), 832 } 833 834 SUPPORTED_JSON_PATH_PARTS = { 835 exp.JSONPathKey, 836 exp.JSONPathRoot, 837 exp.JSONPathSubscript, 838 } 839 840 TYPE_MAPPING = { 841 **generator.Generator.TYPE_MAPPING, 842 exp.DataType.Type.NESTED: "OBJECT", 843 exp.DataType.Type.STRUCT: "OBJECT", 844 } 845 846 PROPERTIES_LOCATION = { 847 **generator.Generator.PROPERTIES_LOCATION, 848 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 849 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 850 } 851 852 UNSUPPORTED_VALUES_EXPRESSIONS = { 853 exp.Map, 854 exp.StarMap, 855 exp.Struct, 856 exp.VarMap, 857 } 858 859 def with_properties(self, properties: exp.Properties) -> str: 860 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 861 862 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 863 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 864 values_as_table = False 865 866 return super().values_sql(expression, values_as_table=values_as_table) 867 868 def datatype_sql(self, expression: exp.DataType) -> str: 869 expressions = expression.expressions 870 if ( 871 expressions 872 and expression.is_type(*exp.DataType.STRUCT_TYPES) 873 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 874 ): 875 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 876 return "OBJECT" 877 878 return super().datatype_sql(expression) 879 880 def tonumber_sql(self, expression: exp.ToNumber) -> str: 881 return self.func( 882 "TO_NUMBER", 883 expression.this, 884 expression.args.get("format"), 885 expression.args.get("precision"), 886 expression.args.get("scale"), 887 ) 888 889 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 890 milli = expression.args.get("milli") 891 if milli is not None: 892 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 893 expression.set("nano", milli_to_nano) 894 895 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 896 897 def trycast_sql(self, expression: exp.TryCast) -> str: 898 value = expression.this 899 900 if value.type is None: 901 from sqlglot.optimizer.annotate_types import annotate_types 902 903 value = annotate_types(value) 904 905 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 906 return super().trycast_sql(expression) 907 908 # TRY_CAST only works for string values in Snowflake 909 return self.cast_sql(expression) 910 911 def log_sql(self, expression: exp.Log) -> str: 912 if not expression.expression: 913 return self.func("LN", expression.this) 914 915 return super().log_sql(expression) 916 917 def unnest_sql(self, expression: exp.Unnest) -> str: 918 unnest_alias = expression.args.get("alias") 919 offset = expression.args.get("offset") 920 921 columns = [ 922 exp.to_identifier("seq"), 923 exp.to_identifier("key"), 924 exp.to_identifier("path"), 925 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 926 seq_get(unnest_alias.columns if unnest_alias else [], 0) 927 or exp.to_identifier("value"), 928 exp.to_identifier("this"), 929 ] 930 931 if unnest_alias: 932 unnest_alias.set("columns", columns) 933 else: 934 unnest_alias = exp.TableAlias(this="_u", columns=columns) 935 936 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 937 alias = self.sql(unnest_alias) 938 alias = f" AS {alias}" if alias else "" 939 return f"{explode}{alias}" 940 941 def show_sql(self, expression: exp.Show) -> str: 942 terse = "TERSE " if expression.args.get("terse") else "" 943 history = " HISTORY" if expression.args.get("history") else "" 944 like = self.sql(expression, "like") 945 like = f" LIKE {like}" if like else "" 946 947 scope = self.sql(expression, "scope") 948 scope = f" {scope}" if scope else "" 949 950 scope_kind = self.sql(expression, "scope_kind") 951 if scope_kind: 952 scope_kind = f" IN {scope_kind}" 953 954 starts_with = self.sql(expression, "starts_with") 955 if starts_with: 956 starts_with = f" STARTS WITH {starts_with}" 957 958 limit = self.sql(expression, "limit") 959 960 from_ = self.sql(expression, "from") 961 if from_: 962 from_ = f" FROM {from_}" 963 964 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 965 966 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 967 # Other dialects don't support all of the following parameters, so we need to 968 # generate default values as necessary to ensure the transpilation is correct 969 group = expression.args.get("group") 970 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 971 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 972 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 973 974 return self.func( 975 "REGEXP_SUBSTR", 976 expression.this, 977 expression.expression, 978 position, 979 occurrence, 980 parameters, 981 group, 982 ) 983 984 def except_op(self, expression: exp.Except) -> str: 985 if not expression.args.get("distinct"): 986 self.unsupported("EXCEPT with All is not supported in Snowflake") 987 return super().except_op(expression) 988 989 def intersect_op(self, expression: exp.Intersect) -> str: 990 if not expression.args.get("distinct"): 991 self.unsupported("INTERSECT with All is not supported in Snowflake") 992 return super().intersect_op(expression) 993 994 def describe_sql(self, expression: exp.Describe) -> str: 995 # Default to table if kind is unknown 996 kind_value = expression.args.get("kind") or "TABLE" 997 kind = f" {kind_value}" if kind_value else "" 998 this = f" {self.sql(expression, 'this')}" 999 expressions = self.expressions(expression, flat=True) 1000 expressions = f" {expressions}" if expressions else "" 1001 return f"DESCRIBE{kind}{this}{expressions}" 1002 1003 def generatedasidentitycolumnconstraint_sql( 1004 self, expression: exp.GeneratedAsIdentityColumnConstraint 1005 ) -> str: 1006 start = expression.args.get("start") 1007 start = f" START {start}" if start else "" 1008 increment = expression.args.get("increment") 1009 increment = f" INCREMENT {increment}" if increment else "" 1010 return f"AUTOINCREMENT{start}{increment}" 1011 1012 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1013 this = self.sql(expression, "this") 1014 return f"SWAP WITH {this}" 1015 1016 def cluster_sql(self, expression: exp.Cluster) -> str: 1017 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1018 1019 def struct_sql(self, expression: exp.Struct) -> str: 1020 keys = [] 1021 values = [] 1022 1023 for i, e in enumerate(expression.expressions): 1024 if isinstance(e, exp.PropertyEQ): 1025 keys.append( 1026 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1027 ) 1028 values.append(e.expression) 1029 else: 1030 keys.append(exp.Literal.string(f"_{i}")) 1031 values.append(e) 1032 1033 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1034 1035 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1036 if expression.args.get("weight") or expression.args.get("accuracy"): 1037 self.unsupported( 1038 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1039 ) 1040 1041 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1042 1043 def alterset_sql(self, expression: exp.AlterSet) -> str: 1044 exprs = self.expressions(expression, flat=True) 1045 exprs = f" {exprs}" if exprs else "" 1046 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1047 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1048 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1049 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1050 tag = self.expressions(expression, key="tag", flat=True) 1051 tag = f" TAG {tag}" if tag else "" 1052 1053 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
868 def datatype_sql(self, expression: exp.DataType) -> str: 869 expressions = expression.expressions 870 if ( 871 expressions 872 and expression.is_type(*exp.DataType.STRUCT_TYPES) 873 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 874 ): 875 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 876 return "OBJECT" 877 878 return super().datatype_sql(expression)
889 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 890 milli = expression.args.get("milli") 891 if milli is not None: 892 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 893 expression.set("nano", milli_to_nano) 894 895 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
897 def trycast_sql(self, expression: exp.TryCast) -> str: 898 value = expression.this 899 900 if value.type is None: 901 from sqlglot.optimizer.annotate_types import annotate_types 902 903 value = annotate_types(value) 904 905 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 906 return super().trycast_sql(expression) 907 908 # TRY_CAST only works for string values in Snowflake 909 return self.cast_sql(expression)
917 def unnest_sql(self, expression: exp.Unnest) -> str: 918 unnest_alias = expression.args.get("alias") 919 offset = expression.args.get("offset") 920 921 columns = [ 922 exp.to_identifier("seq"), 923 exp.to_identifier("key"), 924 exp.to_identifier("path"), 925 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 926 seq_get(unnest_alias.columns if unnest_alias else [], 0) 927 or exp.to_identifier("value"), 928 exp.to_identifier("this"), 929 ] 930 931 if unnest_alias: 932 unnest_alias.set("columns", columns) 933 else: 934 unnest_alias = exp.TableAlias(this="_u", columns=columns) 935 936 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 937 alias = self.sql(unnest_alias) 938 alias = f" AS {alias}" if alias else "" 939 return f"{explode}{alias}"
941 def show_sql(self, expression: exp.Show) -> str: 942 terse = "TERSE " if expression.args.get("terse") else "" 943 history = " HISTORY" if expression.args.get("history") else "" 944 like = self.sql(expression, "like") 945 like = f" LIKE {like}" if like else "" 946 947 scope = self.sql(expression, "scope") 948 scope = f" {scope}" if scope else "" 949 950 scope_kind = self.sql(expression, "scope_kind") 951 if scope_kind: 952 scope_kind = f" IN {scope_kind}" 953 954 starts_with = self.sql(expression, "starts_with") 955 if starts_with: 956 starts_with = f" STARTS WITH {starts_with}" 957 958 limit = self.sql(expression, "limit") 959 960 from_ = self.sql(expression, "from") 961 if from_: 962 from_ = f" FROM {from_}" 963 964 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
966 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 967 # Other dialects don't support all of the following parameters, so we need to 968 # generate default values as necessary to ensure the transpilation is correct 969 group = expression.args.get("group") 970 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 971 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 972 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 973 974 return self.func( 975 "REGEXP_SUBSTR", 976 expression.this, 977 expression.expression, 978 position, 979 occurrence, 980 parameters, 981 group, 982 )
994 def describe_sql(self, expression: exp.Describe) -> str: 995 # Default to table if kind is unknown 996 kind_value = expression.args.get("kind") or "TABLE" 997 kind = f" {kind_value}" if kind_value else "" 998 this = f" {self.sql(expression, 'this')}" 999 expressions = self.expressions(expression, flat=True) 1000 expressions = f" {expressions}" if expressions else "" 1001 return f"DESCRIBE{kind}{this}{expressions}"
1003 def generatedasidentitycolumnconstraint_sql( 1004 self, expression: exp.GeneratedAsIdentityColumnConstraint 1005 ) -> str: 1006 start = expression.args.get("start") 1007 start = f" START {start}" if start else "" 1008 increment = expression.args.get("increment") 1009 increment = f" INCREMENT {increment}" if increment else "" 1010 return f"AUTOINCREMENT{start}{increment}"
1019 def struct_sql(self, expression: exp.Struct) -> str: 1020 keys = [] 1021 values = [] 1022 1023 for i, e in enumerate(expression.expressions): 1024 if isinstance(e, exp.PropertyEQ): 1025 keys.append( 1026 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1027 ) 1028 values.append(e.expression) 1029 else: 1030 keys.append(exp.Literal.string(f"_{i}")) 1031 values.append(e) 1032 1033 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1035 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1036 if expression.args.get("weight") or expression.args.get("accuracy"): 1037 self.unsupported( 1038 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1039 ) 1040 1041 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1043 def alterset_sql(self, expression: exp.AlterSet) -> str: 1044 exprs = self.expressions(expression, flat=True) 1045 exprs = f" {exprs}" if exprs else "" 1046 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1047 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1048 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1049 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1050 tag = self.expressions(expression, key="tag", flat=True) 1051 tag = f" TAG {tag}" if tag else "" 1052 1053 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- length_sql
- rand_sql
- strtodate_sql
- strtotime_sql
- changes_sql