sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25) 26from sqlglot.helper import flatten, is_float, is_int, seq_get 27from sqlglot.tokens import TokenType 28 29if t.TYPE_CHECKING: 30 from sqlglot._typing import E 31 32 33# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 34def _build_datetime( 35 name: str, kind: exp.DataType.Type, safe: bool = False 36) -> t.Callable[[t.List], exp.Func]: 37 def _builder(args: t.List) -> exp.Func: 38 value = seq_get(args, 0) 39 int_value = value is not None and is_int(value.name) 40 41 if isinstance(value, exp.Literal): 42 # Converts calls like `TO_TIME('01:02:03')` into casts 43 if len(args) == 1 and value.is_string and not int_value: 44 return exp.cast(value, kind) 45 46 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 47 # cases so we can transpile them, since they're relatively common 48 if kind == exp.DataType.Type.TIMESTAMP: 49 if int_value: 50 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 51 if not is_float(value.this): 52 return build_formatted_time(exp.StrToTime, "snowflake")(args) 53 54 if kind == exp.DataType.Type.DATE and not int_value: 55 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 56 formatted_exp.set("safe", safe) 57 return formatted_exp 58 59 return exp.Anonymous(this=name, expressions=args) 60 61 return _builder 62 63 64def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 65 expression = parser.build_var_map(args) 66 67 if isinstance(expression, exp.StarMap): 68 return expression 69 70 return exp.Struct( 71 expressions=[ 72 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 73 ] 74 ) 75 76 77def _build_datediff(args: t.List) -> exp.DateDiff: 78 return exp.DateDiff( 79 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 80 ) 81 82 83def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 84 def _builder(args: t.List) -> E: 85 return expr_type( 86 this=seq_get(args, 2), 87 expression=seq_get(args, 1), 88 unit=map_date_part(seq_get(args, 0)), 89 ) 90 91 return _builder 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/div0 95def _build_if_from_div0(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 97 true = exp.Literal.number(0) 98 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 99 return exp.If(this=cond, true=true, false=false) 100 101 102# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 103def _build_if_from_zeroifnull(args: t.List) -> exp.If: 104 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 105 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 106 107 108# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 109def _build_if_from_nullifzero(args: t.List) -> exp.If: 110 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 111 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 112 113 114def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 115 flag = expression.text("flag") 116 117 if "i" not in flag: 118 flag += "i" 119 120 return self.func( 121 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 122 ) 123 124 125def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 126 regexp_replace = exp.RegexpReplace.from_arg_list(args) 127 128 if not regexp_replace.args.get("replacement"): 129 regexp_replace.set("replacement", exp.Literal.string("")) 130 131 return regexp_replace 132 133 134def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 135 def _parse(self: Snowflake.Parser) -> exp.Show: 136 return self._parse_show_snowflake(*args, **kwargs) 137 138 return _parse 139 140 141def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 142 trunc = date_trunc_to_time(args) 143 trunc.set("unit", map_date_part(trunc.args["unit"])) 144 return trunc 145 146 147def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 148 """ 149 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 150 so we need to unqualify them. 151 152 Example: 153 >>> from sqlglot import parse_one 154 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 155 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 156 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 157 """ 158 if isinstance(expression, exp.Pivot) and expression.unpivot: 159 expression = transforms.unqualify_columns(expression) 160 161 return expression 162 163 164def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 165 assert isinstance(expression, exp.Create) 166 167 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 168 if expression.this in exp.DataType.NESTED_TYPES: 169 expression.set("expressions", None) 170 return expression 171 172 props = expression.args.get("properties") 173 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 174 for schema_expression in expression.this.expressions: 175 if isinstance(schema_expression, exp.ColumnDef): 176 column_type = schema_expression.kind 177 if isinstance(column_type, exp.DataType): 178 column_type.transform(_flatten_structured_type, copy=False) 179 180 return expression 181 182 183def _unnest_generate_date_array(expression: exp.Expression) -> exp.Expression: 184 if isinstance(expression, exp.Select): 185 for unnest in expression.find_all(exp.Unnest): 186 if ( 187 isinstance(unnest.parent, (exp.From, exp.Join)) 188 and len(unnest.expressions) == 1 189 and isinstance(unnest.expressions[0], exp.GenerateDateArray) 190 ): 191 generate_date_array = unnest.expressions[0] 192 start = generate_date_array.args.get("start") 193 end = generate_date_array.args.get("end") 194 step = generate_date_array.args.get("step") 195 196 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 197 continue 198 199 unit = step.args.get("unit") 200 201 unnest_alias = unnest.args.get("alias") 202 if unnest_alias: 203 unnest_alias = unnest_alias.copy() 204 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 205 else: 206 sequence_value_name = "value" 207 208 # We'll add the next sequence value to the starting date and project the result 209 date_add = _build_date_time_add(exp.DateAdd)( 210 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 211 ).as_(sequence_value_name) 212 213 # We use DATEDIFF to compute the number of sequence values needed 214 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 215 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 216 ) 217 218 unnest.set("expressions", [number_sequence]) 219 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 220 221 return expression 222 223 224class Snowflake(Dialect): 225 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 226 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 227 NULL_ORDERING = "nulls_are_large" 228 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 229 SUPPORTS_USER_DEFINED_TYPES = False 230 SUPPORTS_SEMI_ANTI_JOIN = False 231 PREFER_CTE_ALIAS_COLUMN = True 232 TABLESAMPLE_SIZE_IS_PERCENT = True 233 COPY_PARAMS_ARE_CSV = False 234 235 TIME_MAPPING = { 236 "YYYY": "%Y", 237 "yyyy": "%Y", 238 "YY": "%y", 239 "yy": "%y", 240 "MMMM": "%B", 241 "mmmm": "%B", 242 "MON": "%b", 243 "mon": "%b", 244 "MM": "%m", 245 "mm": "%m", 246 "DD": "%d", 247 "dd": "%-d", 248 "DY": "%a", 249 "dy": "%w", 250 "HH24": "%H", 251 "hh24": "%H", 252 "HH12": "%I", 253 "hh12": "%I", 254 "MI": "%M", 255 "mi": "%M", 256 "SS": "%S", 257 "ss": "%S", 258 "FF": "%f", 259 "ff": "%f", 260 "FF6": "%f", 261 "ff6": "%f", 262 } 263 264 def quote_identifier(self, expression: E, identify: bool = True) -> E: 265 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 266 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 267 if ( 268 isinstance(expression, exp.Identifier) 269 and isinstance(expression.parent, exp.Table) 270 and expression.name.lower() == "dual" 271 ): 272 return expression # type: ignore 273 274 return super().quote_identifier(expression, identify=identify) 275 276 class Parser(parser.Parser): 277 IDENTIFY_PIVOT_STRINGS = True 278 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 279 COLON_IS_VARIANT_EXTRACT = True 280 281 ID_VAR_TOKENS = { 282 *parser.Parser.ID_VAR_TOKENS, 283 TokenType.MATCH_CONDITION, 284 } 285 286 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 287 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 288 289 FUNCTIONS = { 290 **parser.Parser.FUNCTIONS, 291 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 292 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 293 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 294 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 295 this=seq_get(args, 1), expression=seq_get(args, 0) 296 ), 297 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 298 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 299 start=seq_get(args, 0), 300 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 301 step=seq_get(args, 2), 302 ), 303 "BITXOR": binary_from_function(exp.BitwiseXor), 304 "BIT_XOR": binary_from_function(exp.BitwiseXor), 305 "BOOLXOR": binary_from_function(exp.Xor), 306 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 307 "DATE_TRUNC": _date_trunc_to_time, 308 "DATEADD": _build_date_time_add(exp.DateAdd), 309 "DATEDIFF": _build_datediff, 310 "DIV0": _build_if_from_div0, 311 "FLATTEN": exp.Explode.from_arg_list, 312 "GET_PATH": lambda args, dialect: exp.JSONExtract( 313 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 314 ), 315 "IFF": exp.If.from_arg_list, 316 "LAST_DAY": lambda args: exp.LastDay( 317 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 318 ), 319 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 320 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 321 "LISTAGG": exp.GroupConcat.from_arg_list, 322 "MEDIAN": lambda args: exp.PercentileCont( 323 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 324 ), 325 "NULLIFZERO": _build_if_from_nullifzero, 326 "OBJECT_CONSTRUCT": _build_object_construct, 327 "REGEXP_REPLACE": _build_regexp_replace, 328 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 329 "RLIKE": exp.RegexpLike.from_arg_list, 330 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 331 "TIMEADD": _build_date_time_add(exp.TimeAdd), 332 "TIMEDIFF": _build_datediff, 333 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 334 "TIMESTAMPDIFF": _build_datediff, 335 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 336 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 337 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 338 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 339 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 340 "TO_NUMBER": lambda args: exp.ToNumber( 341 this=seq_get(args, 0), 342 format=seq_get(args, 1), 343 precision=seq_get(args, 2), 344 scale=seq_get(args, 3), 345 ), 346 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 347 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 348 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 349 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 350 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 351 "TO_VARCHAR": exp.ToChar.from_arg_list, 352 "ZEROIFNULL": _build_if_from_zeroifnull, 353 } 354 355 FUNCTION_PARSERS = { 356 **parser.Parser.FUNCTION_PARSERS, 357 "DATE_PART": lambda self: self._parse_date_part(), 358 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 359 } 360 FUNCTION_PARSERS.pop("TRIM") 361 362 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 363 364 RANGE_PARSERS = { 365 **parser.Parser.RANGE_PARSERS, 366 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 367 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 368 } 369 370 ALTER_PARSERS = { 371 **parser.Parser.ALTER_PARSERS, 372 "UNSET": lambda self: self.expression( 373 exp.Set, 374 tag=self._match_text_seq("TAG"), 375 expressions=self._parse_csv(self._parse_id_var), 376 unset=True, 377 ), 378 "SWAP": lambda self: self._parse_alter_table_swap(), 379 } 380 381 STATEMENT_PARSERS = { 382 **parser.Parser.STATEMENT_PARSERS, 383 TokenType.SHOW: lambda self: self._parse_show(), 384 } 385 386 PROPERTY_PARSERS = { 387 **parser.Parser.PROPERTY_PARSERS, 388 "LOCATION": lambda self: self._parse_location_property(), 389 } 390 391 TYPE_CONVERTERS = { 392 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 393 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 394 } 395 396 SHOW_PARSERS = { 397 "SCHEMAS": _show_parser("SCHEMAS"), 398 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 399 "OBJECTS": _show_parser("OBJECTS"), 400 "TERSE OBJECTS": _show_parser("OBJECTS"), 401 "TABLES": _show_parser("TABLES"), 402 "TERSE TABLES": _show_parser("TABLES"), 403 "VIEWS": _show_parser("VIEWS"), 404 "TERSE VIEWS": _show_parser("VIEWS"), 405 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 406 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 407 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 408 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 409 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 410 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 411 "SEQUENCES": _show_parser("SEQUENCES"), 412 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 413 "COLUMNS": _show_parser("COLUMNS"), 414 "USERS": _show_parser("USERS"), 415 "TERSE USERS": _show_parser("USERS"), 416 } 417 418 CONSTRAINT_PARSERS = { 419 **parser.Parser.CONSTRAINT_PARSERS, 420 "WITH": lambda self: self._parse_with_constraint(), 421 "MASKING": lambda self: self._parse_with_constraint(), 422 "PROJECTION": lambda self: self._parse_with_constraint(), 423 "TAG": lambda self: self._parse_with_constraint(), 424 } 425 426 STAGED_FILE_SINGLE_TOKENS = { 427 TokenType.DOT, 428 TokenType.MOD, 429 TokenType.SLASH, 430 } 431 432 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 433 434 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 435 436 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 437 438 LAMBDAS = { 439 **parser.Parser.LAMBDAS, 440 TokenType.ARROW: lambda self, expressions: self.expression( 441 exp.Lambda, 442 this=self._replace_lambda( 443 self._parse_assignment(), 444 expressions, 445 ), 446 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 447 ), 448 } 449 450 def _negate_range( 451 self, this: t.Optional[exp.Expression] = None 452 ) -> t.Optional[exp.Expression]: 453 if not this: 454 return this 455 456 query = this.args.get("query") 457 if isinstance(this, exp.In) and isinstance(query, exp.Query): 458 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 459 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 460 # which can produce different results (most likely a SnowFlake bug). 461 # 462 # https://docs.snowflake.com/en/sql-reference/functions/in 463 # Context: https://github.com/tobymao/sqlglot/issues/3890 464 return self.expression( 465 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 466 ) 467 468 return self.expression(exp.Not, this=this) 469 470 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 471 if self._prev.token_type != TokenType.WITH: 472 self._retreat(self._index - 1) 473 474 if self._match_text_seq("MASKING", "POLICY"): 475 policy = self._parse_column() 476 return self.expression( 477 exp.MaskingPolicyColumnConstraint, 478 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 479 expressions=self._match(TokenType.USING) 480 and self._parse_wrapped_csv(self._parse_id_var), 481 ) 482 if self._match_text_seq("PROJECTION", "POLICY"): 483 policy = self._parse_column() 484 return self.expression( 485 exp.ProjectionPolicyColumnConstraint, 486 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 487 ) 488 if self._match(TokenType.TAG): 489 return self.expression( 490 exp.TagColumnConstraint, 491 expressions=self._parse_wrapped_csv(self._parse_property), 492 ) 493 494 return None 495 496 def _parse_create(self) -> exp.Create | exp.Command: 497 expression = super()._parse_create() 498 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 499 # Replace the Table node with the enclosed Identifier 500 expression.this.replace(expression.this.this) 501 502 return expression 503 504 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 505 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 506 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 507 this = self._parse_var() or self._parse_type() 508 509 if not this: 510 return None 511 512 self._match(TokenType.COMMA) 513 expression = self._parse_bitwise() 514 this = map_date_part(this) 515 name = this.name.upper() 516 517 if name.startswith("EPOCH"): 518 if name == "EPOCH_MILLISECOND": 519 scale = 10**3 520 elif name == "EPOCH_MICROSECOND": 521 scale = 10**6 522 elif name == "EPOCH_NANOSECOND": 523 scale = 10**9 524 else: 525 scale = None 526 527 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 528 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 529 530 if scale: 531 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 532 533 return to_unix 534 535 return self.expression(exp.Extract, this=this, expression=expression) 536 537 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 538 if is_map: 539 # Keys are strings in Snowflake's objects, see also: 540 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 541 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 542 return self._parse_slice(self._parse_string()) 543 544 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 545 546 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 547 lateral = super()._parse_lateral() 548 if not lateral: 549 return lateral 550 551 if isinstance(lateral.this, exp.Explode): 552 table_alias = lateral.args.get("alias") 553 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 554 if table_alias and not table_alias.args.get("columns"): 555 table_alias.set("columns", columns) 556 elif not table_alias: 557 exp.alias_(lateral, "_flattened", table=columns, copy=False) 558 559 return lateral 560 561 def _parse_table_parts( 562 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 563 ) -> exp.Table: 564 # https://docs.snowflake.com/en/user-guide/querying-stage 565 if self._match(TokenType.STRING, advance=False): 566 table = self._parse_string() 567 elif self._match_text_seq("@", advance=False): 568 table = self._parse_location_path() 569 else: 570 table = None 571 572 if table: 573 file_format = None 574 pattern = None 575 576 wrapped = self._match(TokenType.L_PAREN) 577 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 578 if self._match_text_seq("FILE_FORMAT", "=>"): 579 file_format = self._parse_string() or super()._parse_table_parts( 580 is_db_reference=is_db_reference 581 ) 582 elif self._match_text_seq("PATTERN", "=>"): 583 pattern = self._parse_string() 584 else: 585 break 586 587 self._match(TokenType.COMMA) 588 589 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 590 else: 591 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 592 593 return table 594 595 def _parse_id_var( 596 self, 597 any_token: bool = True, 598 tokens: t.Optional[t.Collection[TokenType]] = None, 599 ) -> t.Optional[exp.Expression]: 600 if self._match_text_seq("IDENTIFIER", "("): 601 identifier = ( 602 super()._parse_id_var(any_token=any_token, tokens=tokens) 603 or self._parse_string() 604 ) 605 self._match_r_paren() 606 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 607 608 return super()._parse_id_var(any_token=any_token, tokens=tokens) 609 610 def _parse_show_snowflake(self, this: str) -> exp.Show: 611 scope = None 612 scope_kind = None 613 614 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 615 # which is syntactically valid but has no effect on the output 616 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 617 618 history = self._match_text_seq("HISTORY") 619 620 like = self._parse_string() if self._match(TokenType.LIKE) else None 621 622 if self._match(TokenType.IN): 623 if self._match_text_seq("ACCOUNT"): 624 scope_kind = "ACCOUNT" 625 elif self._match_set(self.DB_CREATABLES): 626 scope_kind = self._prev.text.upper() 627 if self._curr: 628 scope = self._parse_table_parts() 629 elif self._curr: 630 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 631 scope = self._parse_table_parts() 632 633 return self.expression( 634 exp.Show, 635 **{ 636 "terse": terse, 637 "this": this, 638 "history": history, 639 "like": like, 640 "scope": scope, 641 "scope_kind": scope_kind, 642 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 643 "limit": self._parse_limit(), 644 "from": self._parse_string() if self._match(TokenType.FROM) else None, 645 }, 646 ) 647 648 def _parse_alter_table_swap(self) -> exp.SwapTable: 649 self._match_text_seq("WITH") 650 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 651 652 def _parse_location_property(self) -> exp.LocationProperty: 653 self._match(TokenType.EQ) 654 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 655 656 def _parse_file_location(self) -> t.Optional[exp.Expression]: 657 # Parse either a subquery or a staged file 658 return ( 659 self._parse_select(table=True, parse_subquery_alias=False) 660 if self._match(TokenType.L_PAREN, advance=False) 661 else self._parse_table_parts() 662 ) 663 664 def _parse_location_path(self) -> exp.Var: 665 parts = [self._advance_any(ignore_reserved=True)] 666 667 # We avoid consuming a comma token because external tables like @foo and @bar 668 # can be joined in a query with a comma separator, as well as closing paren 669 # in case of subqueries 670 while self._is_connected() and not self._match_set( 671 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 672 ): 673 parts.append(self._advance_any(ignore_reserved=True)) 674 675 return exp.var("".join(part.text for part in parts if part)) 676 677 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 678 this = super()._parse_lambda_arg() 679 680 if not this: 681 return this 682 683 typ = self._parse_types() 684 685 if typ: 686 return self.expression(exp.Cast, this=this, to=typ) 687 688 return this 689 690 class Tokenizer(tokens.Tokenizer): 691 STRING_ESCAPES = ["\\", "'"] 692 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 693 RAW_STRINGS = ["$$"] 694 COMMENTS = ["--", "//", ("/*", "*/")] 695 NESTED_COMMENTS = False 696 697 KEYWORDS = { 698 **tokens.Tokenizer.KEYWORDS, 699 "BYTEINT": TokenType.INT, 700 "CHAR VARYING": TokenType.VARCHAR, 701 "CHARACTER VARYING": TokenType.VARCHAR, 702 "EXCLUDE": TokenType.EXCEPT, 703 "ILIKE ANY": TokenType.ILIKE_ANY, 704 "LIKE ANY": TokenType.LIKE_ANY, 705 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 706 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 707 "MINUS": TokenType.EXCEPT, 708 "NCHAR VARYING": TokenType.VARCHAR, 709 "PUT": TokenType.COMMAND, 710 "REMOVE": TokenType.COMMAND, 711 "RM": TokenType.COMMAND, 712 "SAMPLE": TokenType.TABLE_SAMPLE, 713 "SQL_DOUBLE": TokenType.DOUBLE, 714 "SQL_VARCHAR": TokenType.VARCHAR, 715 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 716 "TAG": TokenType.TAG, 717 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 718 "TOP": TokenType.TOP, 719 "WAREHOUSE": TokenType.WAREHOUSE, 720 "STREAMLIT": TokenType.STREAMLIT, 721 } 722 KEYWORDS.pop("/*+") 723 724 SINGLE_TOKENS = { 725 **tokens.Tokenizer.SINGLE_TOKENS, 726 "$": TokenType.PARAMETER, 727 } 728 729 VAR_SINGLE_TOKENS = {"$"} 730 731 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 732 733 class Generator(generator.Generator): 734 PARAMETER_TOKEN = "$" 735 MATCHED_BY_SOURCE = False 736 SINGLE_STRING_INTERVAL = True 737 JOIN_HINTS = False 738 TABLE_HINTS = False 739 QUERY_HINTS = False 740 AGGREGATE_FILTER_SUPPORTED = False 741 SUPPORTS_TABLE_COPY = False 742 COLLATE_IS_FUNC = True 743 LIMIT_ONLY_LITERALS = True 744 JSON_KEY_VALUE_PAIR_SEP = "," 745 INSERT_OVERWRITE = " OVERWRITE INTO" 746 STRUCT_DELIMITER = ("(", ")") 747 COPY_PARAMS_ARE_WRAPPED = False 748 COPY_PARAMS_EQ_REQUIRED = True 749 STAR_EXCEPT = "EXCLUDE" 750 SUPPORTS_EXPLODING_PROJECTIONS = False 751 ARRAY_CONCAT_IS_VAR_LEN = False 752 SUPPORTS_CONVERT_TIMEZONE = True 753 754 TRANSFORMS = { 755 **generator.Generator.TRANSFORMS, 756 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 757 exp.ArgMax: rename_func("MAX_BY"), 758 exp.ArgMin: rename_func("MIN_BY"), 759 exp.Array: inline_array_sql, 760 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 761 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 762 exp.AtTimeZone: lambda self, e: self.func( 763 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 764 ), 765 exp.BitwiseXor: rename_func("BITXOR"), 766 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 767 exp.DateAdd: date_delta_sql("DATEADD"), 768 exp.DateDiff: date_delta_sql("DATEDIFF"), 769 exp.DateStrToDate: datestrtodate_sql, 770 exp.DayOfMonth: rename_func("DAYOFMONTH"), 771 exp.DayOfWeek: rename_func("DAYOFWEEK"), 772 exp.DayOfYear: rename_func("DAYOFYEAR"), 773 exp.Explode: rename_func("FLATTEN"), 774 exp.Extract: rename_func("DATE_PART"), 775 exp.FromTimeZone: lambda self, e: self.func( 776 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 777 ), 778 exp.GenerateSeries: lambda self, e: self.func( 779 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 780 ), 781 exp.GroupConcat: rename_func("LISTAGG"), 782 exp.If: if_sql(name="IFF", false_value="NULL"), 783 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 784 exp.JSONExtractScalar: lambda self, e: self.func( 785 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 786 ), 787 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 788 exp.JSONPathRoot: lambda *_: "", 789 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 790 exp.LogicalOr: rename_func("BOOLOR_AGG"), 791 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 792 exp.Max: max_or_greatest, 793 exp.Min: min_or_least, 794 exp.ParseJSON: lambda self, e: self.func( 795 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 796 ), 797 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 798 exp.PercentileCont: transforms.preprocess( 799 [transforms.add_within_group_for_percentiles] 800 ), 801 exp.PercentileDisc: transforms.preprocess( 802 [transforms.add_within_group_for_percentiles] 803 ), 804 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 805 exp.RegexpILike: _regexpilike_sql, 806 exp.Rand: rename_func("RANDOM"), 807 exp.Select: transforms.preprocess( 808 [ 809 transforms.eliminate_distinct_on, 810 transforms.explode_to_unnest(), 811 transforms.eliminate_semi_and_anti_joins, 812 _unnest_generate_date_array, 813 ] 814 ), 815 exp.SHA: rename_func("SHA1"), 816 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 817 exp.StartsWith: rename_func("STARTSWITH"), 818 exp.StrPosition: lambda self, e: self.func( 819 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 820 ), 821 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 822 exp.Stuff: rename_func("INSERT"), 823 exp.TimeAdd: date_delta_sql("TIMEADD"), 824 exp.TimestampDiff: lambda self, e: self.func( 825 "TIMESTAMPDIFF", e.unit, e.expression, e.this 826 ), 827 exp.TimestampTrunc: timestamptrunc_sql(), 828 exp.TimeStrToTime: timestrtotime_sql, 829 exp.TimeToStr: lambda self, e: self.func( 830 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 831 ), 832 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 833 exp.ToArray: rename_func("TO_ARRAY"), 834 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 835 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 836 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 837 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 838 exp.TsOrDsToDate: lambda self, e: self.func( 839 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 840 ), 841 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 842 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 843 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 844 exp.Xor: rename_func("BOOLXOR"), 845 } 846 847 SUPPORTED_JSON_PATH_PARTS = { 848 exp.JSONPathKey, 849 exp.JSONPathRoot, 850 exp.JSONPathSubscript, 851 } 852 853 TYPE_MAPPING = { 854 **generator.Generator.TYPE_MAPPING, 855 exp.DataType.Type.NESTED: "OBJECT", 856 exp.DataType.Type.STRUCT: "OBJECT", 857 } 858 859 PROPERTIES_LOCATION = { 860 **generator.Generator.PROPERTIES_LOCATION, 861 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 862 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 863 } 864 865 UNSUPPORTED_VALUES_EXPRESSIONS = { 866 exp.Map, 867 exp.StarMap, 868 exp.Struct, 869 exp.VarMap, 870 } 871 872 def with_properties(self, properties: exp.Properties) -> str: 873 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 874 875 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 876 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 877 values_as_table = False 878 879 return super().values_sql(expression, values_as_table=values_as_table) 880 881 def datatype_sql(self, expression: exp.DataType) -> str: 882 expressions = expression.expressions 883 if ( 884 expressions 885 and expression.is_type(*exp.DataType.STRUCT_TYPES) 886 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 887 ): 888 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 889 return "OBJECT" 890 891 return super().datatype_sql(expression) 892 893 def tonumber_sql(self, expression: exp.ToNumber) -> str: 894 return self.func( 895 "TO_NUMBER", 896 expression.this, 897 expression.args.get("format"), 898 expression.args.get("precision"), 899 expression.args.get("scale"), 900 ) 901 902 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 903 milli = expression.args.get("milli") 904 if milli is not None: 905 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 906 expression.set("nano", milli_to_nano) 907 908 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 909 910 def trycast_sql(self, expression: exp.TryCast) -> str: 911 value = expression.this 912 913 if value.type is None: 914 from sqlglot.optimizer.annotate_types import annotate_types 915 916 value = annotate_types(value) 917 918 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 919 return super().trycast_sql(expression) 920 921 # TRY_CAST only works for string values in Snowflake 922 return self.cast_sql(expression) 923 924 def log_sql(self, expression: exp.Log) -> str: 925 if not expression.expression: 926 return self.func("LN", expression.this) 927 928 return super().log_sql(expression) 929 930 def unnest_sql(self, expression: exp.Unnest) -> str: 931 unnest_alias = expression.args.get("alias") 932 offset = expression.args.get("offset") 933 934 columns = [ 935 exp.to_identifier("seq"), 936 exp.to_identifier("key"), 937 exp.to_identifier("path"), 938 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 939 seq_get(unnest_alias.columns if unnest_alias else [], 0) 940 or exp.to_identifier("value"), 941 exp.to_identifier("this"), 942 ] 943 944 if unnest_alias: 945 unnest_alias.set("columns", columns) 946 else: 947 unnest_alias = exp.TableAlias(this="_u", columns=columns) 948 949 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 950 alias = self.sql(unnest_alias) 951 alias = f" AS {alias}" if alias else "" 952 return f"{explode}{alias}" 953 954 def show_sql(self, expression: exp.Show) -> str: 955 terse = "TERSE " if expression.args.get("terse") else "" 956 history = " HISTORY" if expression.args.get("history") else "" 957 like = self.sql(expression, "like") 958 like = f" LIKE {like}" if like else "" 959 960 scope = self.sql(expression, "scope") 961 scope = f" {scope}" if scope else "" 962 963 scope_kind = self.sql(expression, "scope_kind") 964 if scope_kind: 965 scope_kind = f" IN {scope_kind}" 966 967 starts_with = self.sql(expression, "starts_with") 968 if starts_with: 969 starts_with = f" STARTS WITH {starts_with}" 970 971 limit = self.sql(expression, "limit") 972 973 from_ = self.sql(expression, "from") 974 if from_: 975 from_ = f" FROM {from_}" 976 977 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 978 979 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 980 # Other dialects don't support all of the following parameters, so we need to 981 # generate default values as necessary to ensure the transpilation is correct 982 group = expression.args.get("group") 983 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 984 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 985 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 986 987 return self.func( 988 "REGEXP_SUBSTR", 989 expression.this, 990 expression.expression, 991 position, 992 occurrence, 993 parameters, 994 group, 995 ) 996 997 def except_op(self, expression: exp.Except) -> str: 998 if not expression.args.get("distinct"): 999 self.unsupported("EXCEPT with All is not supported in Snowflake") 1000 return super().except_op(expression) 1001 1002 def intersect_op(self, expression: exp.Intersect) -> str: 1003 if not expression.args.get("distinct"): 1004 self.unsupported("INTERSECT with All is not supported in Snowflake") 1005 return super().intersect_op(expression) 1006 1007 def describe_sql(self, expression: exp.Describe) -> str: 1008 # Default to table if kind is unknown 1009 kind_value = expression.args.get("kind") or "TABLE" 1010 kind = f" {kind_value}" if kind_value else "" 1011 this = f" {self.sql(expression, 'this')}" 1012 expressions = self.expressions(expression, flat=True) 1013 expressions = f" {expressions}" if expressions else "" 1014 return f"DESCRIBE{kind}{this}{expressions}" 1015 1016 def generatedasidentitycolumnconstraint_sql( 1017 self, expression: exp.GeneratedAsIdentityColumnConstraint 1018 ) -> str: 1019 start = expression.args.get("start") 1020 start = f" START {start}" if start else "" 1021 increment = expression.args.get("increment") 1022 increment = f" INCREMENT {increment}" if increment else "" 1023 return f"AUTOINCREMENT{start}{increment}" 1024 1025 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1026 this = self.sql(expression, "this") 1027 return f"SWAP WITH {this}" 1028 1029 def cluster_sql(self, expression: exp.Cluster) -> str: 1030 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1031 1032 def struct_sql(self, expression: exp.Struct) -> str: 1033 keys = [] 1034 values = [] 1035 1036 for i, e in enumerate(expression.expressions): 1037 if isinstance(e, exp.PropertyEQ): 1038 keys.append( 1039 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1040 ) 1041 values.append(e.expression) 1042 else: 1043 keys.append(exp.Literal.string(f"_{i}")) 1044 values.append(e) 1045 1046 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1047 1048 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1049 if expression.args.get("weight") or expression.args.get("accuracy"): 1050 self.unsupported( 1051 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1052 ) 1053 1054 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1055 1056 def alterset_sql(self, expression: exp.AlterSet) -> str: 1057 exprs = self.expressions(expression, flat=True) 1058 exprs = f" {exprs}" if exprs else "" 1059 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1060 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1061 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1062 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1063 tag = self.expressions(expression, key="tag", flat=True) 1064 tag = f" TAG {tag}" if tag else "" 1065 1066 return f"SET{exprs}{file_format}{copy_options}{tag}"
225class Snowflake(Dialect): 226 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 227 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 228 NULL_ORDERING = "nulls_are_large" 229 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 230 SUPPORTS_USER_DEFINED_TYPES = False 231 SUPPORTS_SEMI_ANTI_JOIN = False 232 PREFER_CTE_ALIAS_COLUMN = True 233 TABLESAMPLE_SIZE_IS_PERCENT = True 234 COPY_PARAMS_ARE_CSV = False 235 236 TIME_MAPPING = { 237 "YYYY": "%Y", 238 "yyyy": "%Y", 239 "YY": "%y", 240 "yy": "%y", 241 "MMMM": "%B", 242 "mmmm": "%B", 243 "MON": "%b", 244 "mon": "%b", 245 "MM": "%m", 246 "mm": "%m", 247 "DD": "%d", 248 "dd": "%-d", 249 "DY": "%a", 250 "dy": "%w", 251 "HH24": "%H", 252 "hh24": "%H", 253 "HH12": "%I", 254 "hh12": "%I", 255 "MI": "%M", 256 "mi": "%M", 257 "SS": "%S", 258 "ss": "%S", 259 "FF": "%f", 260 "ff": "%f", 261 "FF6": "%f", 262 "ff6": "%f", 263 } 264 265 def quote_identifier(self, expression: E, identify: bool = True) -> E: 266 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 267 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 268 if ( 269 isinstance(expression, exp.Identifier) 270 and isinstance(expression.parent, exp.Table) 271 and expression.name.lower() == "dual" 272 ): 273 return expression # type: ignore 274 275 return super().quote_identifier(expression, identify=identify) 276 277 class Parser(parser.Parser): 278 IDENTIFY_PIVOT_STRINGS = True 279 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 280 COLON_IS_VARIANT_EXTRACT = True 281 282 ID_VAR_TOKENS = { 283 *parser.Parser.ID_VAR_TOKENS, 284 TokenType.MATCH_CONDITION, 285 } 286 287 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 288 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 289 290 FUNCTIONS = { 291 **parser.Parser.FUNCTIONS, 292 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 293 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 294 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 295 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 296 this=seq_get(args, 1), expression=seq_get(args, 0) 297 ), 298 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 299 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 300 start=seq_get(args, 0), 301 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 302 step=seq_get(args, 2), 303 ), 304 "BITXOR": binary_from_function(exp.BitwiseXor), 305 "BIT_XOR": binary_from_function(exp.BitwiseXor), 306 "BOOLXOR": binary_from_function(exp.Xor), 307 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 308 "DATE_TRUNC": _date_trunc_to_time, 309 "DATEADD": _build_date_time_add(exp.DateAdd), 310 "DATEDIFF": _build_datediff, 311 "DIV0": _build_if_from_div0, 312 "FLATTEN": exp.Explode.from_arg_list, 313 "GET_PATH": lambda args, dialect: exp.JSONExtract( 314 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 315 ), 316 "IFF": exp.If.from_arg_list, 317 "LAST_DAY": lambda args: exp.LastDay( 318 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 319 ), 320 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 321 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 322 "LISTAGG": exp.GroupConcat.from_arg_list, 323 "MEDIAN": lambda args: exp.PercentileCont( 324 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 325 ), 326 "NULLIFZERO": _build_if_from_nullifzero, 327 "OBJECT_CONSTRUCT": _build_object_construct, 328 "REGEXP_REPLACE": _build_regexp_replace, 329 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 330 "RLIKE": exp.RegexpLike.from_arg_list, 331 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 332 "TIMEADD": _build_date_time_add(exp.TimeAdd), 333 "TIMEDIFF": _build_datediff, 334 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 335 "TIMESTAMPDIFF": _build_datediff, 336 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 337 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 338 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 339 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 340 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 341 "TO_NUMBER": lambda args: exp.ToNumber( 342 this=seq_get(args, 0), 343 format=seq_get(args, 1), 344 precision=seq_get(args, 2), 345 scale=seq_get(args, 3), 346 ), 347 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 348 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 349 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 350 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 351 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 352 "TO_VARCHAR": exp.ToChar.from_arg_list, 353 "ZEROIFNULL": _build_if_from_zeroifnull, 354 } 355 356 FUNCTION_PARSERS = { 357 **parser.Parser.FUNCTION_PARSERS, 358 "DATE_PART": lambda self: self._parse_date_part(), 359 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 360 } 361 FUNCTION_PARSERS.pop("TRIM") 362 363 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 364 365 RANGE_PARSERS = { 366 **parser.Parser.RANGE_PARSERS, 367 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 368 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 369 } 370 371 ALTER_PARSERS = { 372 **parser.Parser.ALTER_PARSERS, 373 "UNSET": lambda self: self.expression( 374 exp.Set, 375 tag=self._match_text_seq("TAG"), 376 expressions=self._parse_csv(self._parse_id_var), 377 unset=True, 378 ), 379 "SWAP": lambda self: self._parse_alter_table_swap(), 380 } 381 382 STATEMENT_PARSERS = { 383 **parser.Parser.STATEMENT_PARSERS, 384 TokenType.SHOW: lambda self: self._parse_show(), 385 } 386 387 PROPERTY_PARSERS = { 388 **parser.Parser.PROPERTY_PARSERS, 389 "LOCATION": lambda self: self._parse_location_property(), 390 } 391 392 TYPE_CONVERTERS = { 393 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 394 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 395 } 396 397 SHOW_PARSERS = { 398 "SCHEMAS": _show_parser("SCHEMAS"), 399 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 400 "OBJECTS": _show_parser("OBJECTS"), 401 "TERSE OBJECTS": _show_parser("OBJECTS"), 402 "TABLES": _show_parser("TABLES"), 403 "TERSE TABLES": _show_parser("TABLES"), 404 "VIEWS": _show_parser("VIEWS"), 405 "TERSE VIEWS": _show_parser("VIEWS"), 406 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 407 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 408 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 409 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 410 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 411 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 412 "SEQUENCES": _show_parser("SEQUENCES"), 413 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 414 "COLUMNS": _show_parser("COLUMNS"), 415 "USERS": _show_parser("USERS"), 416 "TERSE USERS": _show_parser("USERS"), 417 } 418 419 CONSTRAINT_PARSERS = { 420 **parser.Parser.CONSTRAINT_PARSERS, 421 "WITH": lambda self: self._parse_with_constraint(), 422 "MASKING": lambda self: self._parse_with_constraint(), 423 "PROJECTION": lambda self: self._parse_with_constraint(), 424 "TAG": lambda self: self._parse_with_constraint(), 425 } 426 427 STAGED_FILE_SINGLE_TOKENS = { 428 TokenType.DOT, 429 TokenType.MOD, 430 TokenType.SLASH, 431 } 432 433 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 434 435 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 436 437 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 438 439 LAMBDAS = { 440 **parser.Parser.LAMBDAS, 441 TokenType.ARROW: lambda self, expressions: self.expression( 442 exp.Lambda, 443 this=self._replace_lambda( 444 self._parse_assignment(), 445 expressions, 446 ), 447 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 448 ), 449 } 450 451 def _negate_range( 452 self, this: t.Optional[exp.Expression] = None 453 ) -> t.Optional[exp.Expression]: 454 if not this: 455 return this 456 457 query = this.args.get("query") 458 if isinstance(this, exp.In) and isinstance(query, exp.Query): 459 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 460 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 461 # which can produce different results (most likely a SnowFlake bug). 462 # 463 # https://docs.snowflake.com/en/sql-reference/functions/in 464 # Context: https://github.com/tobymao/sqlglot/issues/3890 465 return self.expression( 466 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 467 ) 468 469 return self.expression(exp.Not, this=this) 470 471 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 472 if self._prev.token_type != TokenType.WITH: 473 self._retreat(self._index - 1) 474 475 if self._match_text_seq("MASKING", "POLICY"): 476 policy = self._parse_column() 477 return self.expression( 478 exp.MaskingPolicyColumnConstraint, 479 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 480 expressions=self._match(TokenType.USING) 481 and self._parse_wrapped_csv(self._parse_id_var), 482 ) 483 if self._match_text_seq("PROJECTION", "POLICY"): 484 policy = self._parse_column() 485 return self.expression( 486 exp.ProjectionPolicyColumnConstraint, 487 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 488 ) 489 if self._match(TokenType.TAG): 490 return self.expression( 491 exp.TagColumnConstraint, 492 expressions=self._parse_wrapped_csv(self._parse_property), 493 ) 494 495 return None 496 497 def _parse_create(self) -> exp.Create | exp.Command: 498 expression = super()._parse_create() 499 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 500 # Replace the Table node with the enclosed Identifier 501 expression.this.replace(expression.this.this) 502 503 return expression 504 505 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 506 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 507 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 508 this = self._parse_var() or self._parse_type() 509 510 if not this: 511 return None 512 513 self._match(TokenType.COMMA) 514 expression = self._parse_bitwise() 515 this = map_date_part(this) 516 name = this.name.upper() 517 518 if name.startswith("EPOCH"): 519 if name == "EPOCH_MILLISECOND": 520 scale = 10**3 521 elif name == "EPOCH_MICROSECOND": 522 scale = 10**6 523 elif name == "EPOCH_NANOSECOND": 524 scale = 10**9 525 else: 526 scale = None 527 528 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 529 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 530 531 if scale: 532 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 533 534 return to_unix 535 536 return self.expression(exp.Extract, this=this, expression=expression) 537 538 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 539 if is_map: 540 # Keys are strings in Snowflake's objects, see also: 541 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 542 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 543 return self._parse_slice(self._parse_string()) 544 545 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 546 547 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 548 lateral = super()._parse_lateral() 549 if not lateral: 550 return lateral 551 552 if isinstance(lateral.this, exp.Explode): 553 table_alias = lateral.args.get("alias") 554 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 555 if table_alias and not table_alias.args.get("columns"): 556 table_alias.set("columns", columns) 557 elif not table_alias: 558 exp.alias_(lateral, "_flattened", table=columns, copy=False) 559 560 return lateral 561 562 def _parse_table_parts( 563 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 564 ) -> exp.Table: 565 # https://docs.snowflake.com/en/user-guide/querying-stage 566 if self._match(TokenType.STRING, advance=False): 567 table = self._parse_string() 568 elif self._match_text_seq("@", advance=False): 569 table = self._parse_location_path() 570 else: 571 table = None 572 573 if table: 574 file_format = None 575 pattern = None 576 577 wrapped = self._match(TokenType.L_PAREN) 578 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 579 if self._match_text_seq("FILE_FORMAT", "=>"): 580 file_format = self._parse_string() or super()._parse_table_parts( 581 is_db_reference=is_db_reference 582 ) 583 elif self._match_text_seq("PATTERN", "=>"): 584 pattern = self._parse_string() 585 else: 586 break 587 588 self._match(TokenType.COMMA) 589 590 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 591 else: 592 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 593 594 return table 595 596 def _parse_id_var( 597 self, 598 any_token: bool = True, 599 tokens: t.Optional[t.Collection[TokenType]] = None, 600 ) -> t.Optional[exp.Expression]: 601 if self._match_text_seq("IDENTIFIER", "("): 602 identifier = ( 603 super()._parse_id_var(any_token=any_token, tokens=tokens) 604 or self._parse_string() 605 ) 606 self._match_r_paren() 607 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 608 609 return super()._parse_id_var(any_token=any_token, tokens=tokens) 610 611 def _parse_show_snowflake(self, this: str) -> exp.Show: 612 scope = None 613 scope_kind = None 614 615 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 616 # which is syntactically valid but has no effect on the output 617 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 618 619 history = self._match_text_seq("HISTORY") 620 621 like = self._parse_string() if self._match(TokenType.LIKE) else None 622 623 if self._match(TokenType.IN): 624 if self._match_text_seq("ACCOUNT"): 625 scope_kind = "ACCOUNT" 626 elif self._match_set(self.DB_CREATABLES): 627 scope_kind = self._prev.text.upper() 628 if self._curr: 629 scope = self._parse_table_parts() 630 elif self._curr: 631 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 632 scope = self._parse_table_parts() 633 634 return self.expression( 635 exp.Show, 636 **{ 637 "terse": terse, 638 "this": this, 639 "history": history, 640 "like": like, 641 "scope": scope, 642 "scope_kind": scope_kind, 643 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 644 "limit": self._parse_limit(), 645 "from": self._parse_string() if self._match(TokenType.FROM) else None, 646 }, 647 ) 648 649 def _parse_alter_table_swap(self) -> exp.SwapTable: 650 self._match_text_seq("WITH") 651 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 652 653 def _parse_location_property(self) -> exp.LocationProperty: 654 self._match(TokenType.EQ) 655 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 656 657 def _parse_file_location(self) -> t.Optional[exp.Expression]: 658 # Parse either a subquery or a staged file 659 return ( 660 self._parse_select(table=True, parse_subquery_alias=False) 661 if self._match(TokenType.L_PAREN, advance=False) 662 else self._parse_table_parts() 663 ) 664 665 def _parse_location_path(self) -> exp.Var: 666 parts = [self._advance_any(ignore_reserved=True)] 667 668 # We avoid consuming a comma token because external tables like @foo and @bar 669 # can be joined in a query with a comma separator, as well as closing paren 670 # in case of subqueries 671 while self._is_connected() and not self._match_set( 672 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 673 ): 674 parts.append(self._advance_any(ignore_reserved=True)) 675 676 return exp.var("".join(part.text for part in parts if part)) 677 678 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 679 this = super()._parse_lambda_arg() 680 681 if not this: 682 return this 683 684 typ = self._parse_types() 685 686 if typ: 687 return self.expression(exp.Cast, this=this, to=typ) 688 689 return this 690 691 class Tokenizer(tokens.Tokenizer): 692 STRING_ESCAPES = ["\\", "'"] 693 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 694 RAW_STRINGS = ["$$"] 695 COMMENTS = ["--", "//", ("/*", "*/")] 696 NESTED_COMMENTS = False 697 698 KEYWORDS = { 699 **tokens.Tokenizer.KEYWORDS, 700 "BYTEINT": TokenType.INT, 701 "CHAR VARYING": TokenType.VARCHAR, 702 "CHARACTER VARYING": TokenType.VARCHAR, 703 "EXCLUDE": TokenType.EXCEPT, 704 "ILIKE ANY": TokenType.ILIKE_ANY, 705 "LIKE ANY": TokenType.LIKE_ANY, 706 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 707 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 708 "MINUS": TokenType.EXCEPT, 709 "NCHAR VARYING": TokenType.VARCHAR, 710 "PUT": TokenType.COMMAND, 711 "REMOVE": TokenType.COMMAND, 712 "RM": TokenType.COMMAND, 713 "SAMPLE": TokenType.TABLE_SAMPLE, 714 "SQL_DOUBLE": TokenType.DOUBLE, 715 "SQL_VARCHAR": TokenType.VARCHAR, 716 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 717 "TAG": TokenType.TAG, 718 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 719 "TOP": TokenType.TOP, 720 "WAREHOUSE": TokenType.WAREHOUSE, 721 "STREAMLIT": TokenType.STREAMLIT, 722 } 723 KEYWORDS.pop("/*+") 724 725 SINGLE_TOKENS = { 726 **tokens.Tokenizer.SINGLE_TOKENS, 727 "$": TokenType.PARAMETER, 728 } 729 730 VAR_SINGLE_TOKENS = {"$"} 731 732 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 733 734 class Generator(generator.Generator): 735 PARAMETER_TOKEN = "$" 736 MATCHED_BY_SOURCE = False 737 SINGLE_STRING_INTERVAL = True 738 JOIN_HINTS = False 739 TABLE_HINTS = False 740 QUERY_HINTS = False 741 AGGREGATE_FILTER_SUPPORTED = False 742 SUPPORTS_TABLE_COPY = False 743 COLLATE_IS_FUNC = True 744 LIMIT_ONLY_LITERALS = True 745 JSON_KEY_VALUE_PAIR_SEP = "," 746 INSERT_OVERWRITE = " OVERWRITE INTO" 747 STRUCT_DELIMITER = ("(", ")") 748 COPY_PARAMS_ARE_WRAPPED = False 749 COPY_PARAMS_EQ_REQUIRED = True 750 STAR_EXCEPT = "EXCLUDE" 751 SUPPORTS_EXPLODING_PROJECTIONS = False 752 ARRAY_CONCAT_IS_VAR_LEN = False 753 SUPPORTS_CONVERT_TIMEZONE = True 754 755 TRANSFORMS = { 756 **generator.Generator.TRANSFORMS, 757 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 758 exp.ArgMax: rename_func("MAX_BY"), 759 exp.ArgMin: rename_func("MIN_BY"), 760 exp.Array: inline_array_sql, 761 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 762 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 763 exp.AtTimeZone: lambda self, e: self.func( 764 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 765 ), 766 exp.BitwiseXor: rename_func("BITXOR"), 767 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 768 exp.DateAdd: date_delta_sql("DATEADD"), 769 exp.DateDiff: date_delta_sql("DATEDIFF"), 770 exp.DateStrToDate: datestrtodate_sql, 771 exp.DayOfMonth: rename_func("DAYOFMONTH"), 772 exp.DayOfWeek: rename_func("DAYOFWEEK"), 773 exp.DayOfYear: rename_func("DAYOFYEAR"), 774 exp.Explode: rename_func("FLATTEN"), 775 exp.Extract: rename_func("DATE_PART"), 776 exp.FromTimeZone: lambda self, e: self.func( 777 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 778 ), 779 exp.GenerateSeries: lambda self, e: self.func( 780 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 781 ), 782 exp.GroupConcat: rename_func("LISTAGG"), 783 exp.If: if_sql(name="IFF", false_value="NULL"), 784 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 785 exp.JSONExtractScalar: lambda self, e: self.func( 786 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 787 ), 788 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 789 exp.JSONPathRoot: lambda *_: "", 790 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 791 exp.LogicalOr: rename_func("BOOLOR_AGG"), 792 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 793 exp.Max: max_or_greatest, 794 exp.Min: min_or_least, 795 exp.ParseJSON: lambda self, e: self.func( 796 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 797 ), 798 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 799 exp.PercentileCont: transforms.preprocess( 800 [transforms.add_within_group_for_percentiles] 801 ), 802 exp.PercentileDisc: transforms.preprocess( 803 [transforms.add_within_group_for_percentiles] 804 ), 805 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 806 exp.RegexpILike: _regexpilike_sql, 807 exp.Rand: rename_func("RANDOM"), 808 exp.Select: transforms.preprocess( 809 [ 810 transforms.eliminate_distinct_on, 811 transforms.explode_to_unnest(), 812 transforms.eliminate_semi_and_anti_joins, 813 _unnest_generate_date_array, 814 ] 815 ), 816 exp.SHA: rename_func("SHA1"), 817 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 818 exp.StartsWith: rename_func("STARTSWITH"), 819 exp.StrPosition: lambda self, e: self.func( 820 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 821 ), 822 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 823 exp.Stuff: rename_func("INSERT"), 824 exp.TimeAdd: date_delta_sql("TIMEADD"), 825 exp.TimestampDiff: lambda self, e: self.func( 826 "TIMESTAMPDIFF", e.unit, e.expression, e.this 827 ), 828 exp.TimestampTrunc: timestamptrunc_sql(), 829 exp.TimeStrToTime: timestrtotime_sql, 830 exp.TimeToStr: lambda self, e: self.func( 831 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 832 ), 833 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 834 exp.ToArray: rename_func("TO_ARRAY"), 835 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 836 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 837 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 838 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 839 exp.TsOrDsToDate: lambda self, e: self.func( 840 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 841 ), 842 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 843 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 844 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 845 exp.Xor: rename_func("BOOLXOR"), 846 } 847 848 SUPPORTED_JSON_PATH_PARTS = { 849 exp.JSONPathKey, 850 exp.JSONPathRoot, 851 exp.JSONPathSubscript, 852 } 853 854 TYPE_MAPPING = { 855 **generator.Generator.TYPE_MAPPING, 856 exp.DataType.Type.NESTED: "OBJECT", 857 exp.DataType.Type.STRUCT: "OBJECT", 858 } 859 860 PROPERTIES_LOCATION = { 861 **generator.Generator.PROPERTIES_LOCATION, 862 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 863 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 864 } 865 866 UNSUPPORTED_VALUES_EXPRESSIONS = { 867 exp.Map, 868 exp.StarMap, 869 exp.Struct, 870 exp.VarMap, 871 } 872 873 def with_properties(self, properties: exp.Properties) -> str: 874 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 875 876 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 877 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 878 values_as_table = False 879 880 return super().values_sql(expression, values_as_table=values_as_table) 881 882 def datatype_sql(self, expression: exp.DataType) -> str: 883 expressions = expression.expressions 884 if ( 885 expressions 886 and expression.is_type(*exp.DataType.STRUCT_TYPES) 887 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 888 ): 889 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 890 return "OBJECT" 891 892 return super().datatype_sql(expression) 893 894 def tonumber_sql(self, expression: exp.ToNumber) -> str: 895 return self.func( 896 "TO_NUMBER", 897 expression.this, 898 expression.args.get("format"), 899 expression.args.get("precision"), 900 expression.args.get("scale"), 901 ) 902 903 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 904 milli = expression.args.get("milli") 905 if milli is not None: 906 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 907 expression.set("nano", milli_to_nano) 908 909 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 910 911 def trycast_sql(self, expression: exp.TryCast) -> str: 912 value = expression.this 913 914 if value.type is None: 915 from sqlglot.optimizer.annotate_types import annotate_types 916 917 value = annotate_types(value) 918 919 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 920 return super().trycast_sql(expression) 921 922 # TRY_CAST only works for string values in Snowflake 923 return self.cast_sql(expression) 924 925 def log_sql(self, expression: exp.Log) -> str: 926 if not expression.expression: 927 return self.func("LN", expression.this) 928 929 return super().log_sql(expression) 930 931 def unnest_sql(self, expression: exp.Unnest) -> str: 932 unnest_alias = expression.args.get("alias") 933 offset = expression.args.get("offset") 934 935 columns = [ 936 exp.to_identifier("seq"), 937 exp.to_identifier("key"), 938 exp.to_identifier("path"), 939 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 940 seq_get(unnest_alias.columns if unnest_alias else [], 0) 941 or exp.to_identifier("value"), 942 exp.to_identifier("this"), 943 ] 944 945 if unnest_alias: 946 unnest_alias.set("columns", columns) 947 else: 948 unnest_alias = exp.TableAlias(this="_u", columns=columns) 949 950 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 951 alias = self.sql(unnest_alias) 952 alias = f" AS {alias}" if alias else "" 953 return f"{explode}{alias}" 954 955 def show_sql(self, expression: exp.Show) -> str: 956 terse = "TERSE " if expression.args.get("terse") else "" 957 history = " HISTORY" if expression.args.get("history") else "" 958 like = self.sql(expression, "like") 959 like = f" LIKE {like}" if like else "" 960 961 scope = self.sql(expression, "scope") 962 scope = f" {scope}" if scope else "" 963 964 scope_kind = self.sql(expression, "scope_kind") 965 if scope_kind: 966 scope_kind = f" IN {scope_kind}" 967 968 starts_with = self.sql(expression, "starts_with") 969 if starts_with: 970 starts_with = f" STARTS WITH {starts_with}" 971 972 limit = self.sql(expression, "limit") 973 974 from_ = self.sql(expression, "from") 975 if from_: 976 from_ = f" FROM {from_}" 977 978 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 979 980 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 981 # Other dialects don't support all of the following parameters, so we need to 982 # generate default values as necessary to ensure the transpilation is correct 983 group = expression.args.get("group") 984 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 985 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 986 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 987 988 return self.func( 989 "REGEXP_SUBSTR", 990 expression.this, 991 expression.expression, 992 position, 993 occurrence, 994 parameters, 995 group, 996 ) 997 998 def except_op(self, expression: exp.Except) -> str: 999 if not expression.args.get("distinct"): 1000 self.unsupported("EXCEPT with All is not supported in Snowflake") 1001 return super().except_op(expression) 1002 1003 def intersect_op(self, expression: exp.Intersect) -> str: 1004 if not expression.args.get("distinct"): 1005 self.unsupported("INTERSECT with All is not supported in Snowflake") 1006 return super().intersect_op(expression) 1007 1008 def describe_sql(self, expression: exp.Describe) -> str: 1009 # Default to table if kind is unknown 1010 kind_value = expression.args.get("kind") or "TABLE" 1011 kind = f" {kind_value}" if kind_value else "" 1012 this = f" {self.sql(expression, 'this')}" 1013 expressions = self.expressions(expression, flat=True) 1014 expressions = f" {expressions}" if expressions else "" 1015 return f"DESCRIBE{kind}{this}{expressions}" 1016 1017 def generatedasidentitycolumnconstraint_sql( 1018 self, expression: exp.GeneratedAsIdentityColumnConstraint 1019 ) -> str: 1020 start = expression.args.get("start") 1021 start = f" START {start}" if start else "" 1022 increment = expression.args.get("increment") 1023 increment = f" INCREMENT {increment}" if increment else "" 1024 return f"AUTOINCREMENT{start}{increment}" 1025 1026 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1027 this = self.sql(expression, "this") 1028 return f"SWAP WITH {this}" 1029 1030 def cluster_sql(self, expression: exp.Cluster) -> str: 1031 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1032 1033 def struct_sql(self, expression: exp.Struct) -> str: 1034 keys = [] 1035 values = [] 1036 1037 for i, e in enumerate(expression.expressions): 1038 if isinstance(e, exp.PropertyEQ): 1039 keys.append( 1040 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1041 ) 1042 values.append(e.expression) 1043 else: 1044 keys.append(exp.Literal.string(f"_{i}")) 1045 values.append(e) 1046 1047 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1048 1049 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1050 if expression.args.get("weight") or expression.args.get("accuracy"): 1051 self.unsupported( 1052 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1053 ) 1054 1055 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1056 1057 def alterset_sql(self, expression: exp.AlterSet) -> str: 1058 exprs = self.expressions(expression, flat=True) 1059 exprs = f" {exprs}" if exprs else "" 1060 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1061 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1062 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1063 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1064 tag = self.expressions(expression, key="tag", flat=True) 1065 tag = f" TAG {tag}" if tag else "" 1066 1067 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
265 def quote_identifier(self, expression: E, identify: bool = True) -> E: 266 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 267 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 268 if ( 269 isinstance(expression, exp.Identifier) 270 and isinstance(expression.parent, exp.Table) 271 and expression.name.lower() == "dual" 272 ): 273 return expression # type: ignore 274 275 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
277 class Parser(parser.Parser): 278 IDENTIFY_PIVOT_STRINGS = True 279 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 280 COLON_IS_VARIANT_EXTRACT = True 281 282 ID_VAR_TOKENS = { 283 *parser.Parser.ID_VAR_TOKENS, 284 TokenType.MATCH_CONDITION, 285 } 286 287 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 288 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 289 290 FUNCTIONS = { 291 **parser.Parser.FUNCTIONS, 292 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 293 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 294 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 295 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 296 this=seq_get(args, 1), expression=seq_get(args, 0) 297 ), 298 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 299 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 300 start=seq_get(args, 0), 301 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 302 step=seq_get(args, 2), 303 ), 304 "BITXOR": binary_from_function(exp.BitwiseXor), 305 "BIT_XOR": binary_from_function(exp.BitwiseXor), 306 "BOOLXOR": binary_from_function(exp.Xor), 307 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 308 "DATE_TRUNC": _date_trunc_to_time, 309 "DATEADD": _build_date_time_add(exp.DateAdd), 310 "DATEDIFF": _build_datediff, 311 "DIV0": _build_if_from_div0, 312 "FLATTEN": exp.Explode.from_arg_list, 313 "GET_PATH": lambda args, dialect: exp.JSONExtract( 314 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 315 ), 316 "IFF": exp.If.from_arg_list, 317 "LAST_DAY": lambda args: exp.LastDay( 318 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 319 ), 320 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 321 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 322 "LISTAGG": exp.GroupConcat.from_arg_list, 323 "MEDIAN": lambda args: exp.PercentileCont( 324 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 325 ), 326 "NULLIFZERO": _build_if_from_nullifzero, 327 "OBJECT_CONSTRUCT": _build_object_construct, 328 "REGEXP_REPLACE": _build_regexp_replace, 329 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 330 "RLIKE": exp.RegexpLike.from_arg_list, 331 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 332 "TIMEADD": _build_date_time_add(exp.TimeAdd), 333 "TIMEDIFF": _build_datediff, 334 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 335 "TIMESTAMPDIFF": _build_datediff, 336 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 337 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 338 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 339 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 340 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 341 "TO_NUMBER": lambda args: exp.ToNumber( 342 this=seq_get(args, 0), 343 format=seq_get(args, 1), 344 precision=seq_get(args, 2), 345 scale=seq_get(args, 3), 346 ), 347 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 348 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 349 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 350 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 351 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 352 "TO_VARCHAR": exp.ToChar.from_arg_list, 353 "ZEROIFNULL": _build_if_from_zeroifnull, 354 } 355 356 FUNCTION_PARSERS = { 357 **parser.Parser.FUNCTION_PARSERS, 358 "DATE_PART": lambda self: self._parse_date_part(), 359 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 360 } 361 FUNCTION_PARSERS.pop("TRIM") 362 363 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 364 365 RANGE_PARSERS = { 366 **parser.Parser.RANGE_PARSERS, 367 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 368 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 369 } 370 371 ALTER_PARSERS = { 372 **parser.Parser.ALTER_PARSERS, 373 "UNSET": lambda self: self.expression( 374 exp.Set, 375 tag=self._match_text_seq("TAG"), 376 expressions=self._parse_csv(self._parse_id_var), 377 unset=True, 378 ), 379 "SWAP": lambda self: self._parse_alter_table_swap(), 380 } 381 382 STATEMENT_PARSERS = { 383 **parser.Parser.STATEMENT_PARSERS, 384 TokenType.SHOW: lambda self: self._parse_show(), 385 } 386 387 PROPERTY_PARSERS = { 388 **parser.Parser.PROPERTY_PARSERS, 389 "LOCATION": lambda self: self._parse_location_property(), 390 } 391 392 TYPE_CONVERTERS = { 393 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 394 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 395 } 396 397 SHOW_PARSERS = { 398 "SCHEMAS": _show_parser("SCHEMAS"), 399 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 400 "OBJECTS": _show_parser("OBJECTS"), 401 "TERSE OBJECTS": _show_parser("OBJECTS"), 402 "TABLES": _show_parser("TABLES"), 403 "TERSE TABLES": _show_parser("TABLES"), 404 "VIEWS": _show_parser("VIEWS"), 405 "TERSE VIEWS": _show_parser("VIEWS"), 406 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 407 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 408 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 409 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 410 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 411 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 412 "SEQUENCES": _show_parser("SEQUENCES"), 413 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 414 "COLUMNS": _show_parser("COLUMNS"), 415 "USERS": _show_parser("USERS"), 416 "TERSE USERS": _show_parser("USERS"), 417 } 418 419 CONSTRAINT_PARSERS = { 420 **parser.Parser.CONSTRAINT_PARSERS, 421 "WITH": lambda self: self._parse_with_constraint(), 422 "MASKING": lambda self: self._parse_with_constraint(), 423 "PROJECTION": lambda self: self._parse_with_constraint(), 424 "TAG": lambda self: self._parse_with_constraint(), 425 } 426 427 STAGED_FILE_SINGLE_TOKENS = { 428 TokenType.DOT, 429 TokenType.MOD, 430 TokenType.SLASH, 431 } 432 433 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 434 435 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 436 437 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 438 439 LAMBDAS = { 440 **parser.Parser.LAMBDAS, 441 TokenType.ARROW: lambda self, expressions: self.expression( 442 exp.Lambda, 443 this=self._replace_lambda( 444 self._parse_assignment(), 445 expressions, 446 ), 447 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 448 ), 449 } 450 451 def _negate_range( 452 self, this: t.Optional[exp.Expression] = None 453 ) -> t.Optional[exp.Expression]: 454 if not this: 455 return this 456 457 query = this.args.get("query") 458 if isinstance(this, exp.In) and isinstance(query, exp.Query): 459 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 460 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 461 # which can produce different results (most likely a SnowFlake bug). 462 # 463 # https://docs.snowflake.com/en/sql-reference/functions/in 464 # Context: https://github.com/tobymao/sqlglot/issues/3890 465 return self.expression( 466 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 467 ) 468 469 return self.expression(exp.Not, this=this) 470 471 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 472 if self._prev.token_type != TokenType.WITH: 473 self._retreat(self._index - 1) 474 475 if self._match_text_seq("MASKING", "POLICY"): 476 policy = self._parse_column() 477 return self.expression( 478 exp.MaskingPolicyColumnConstraint, 479 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 480 expressions=self._match(TokenType.USING) 481 and self._parse_wrapped_csv(self._parse_id_var), 482 ) 483 if self._match_text_seq("PROJECTION", "POLICY"): 484 policy = self._parse_column() 485 return self.expression( 486 exp.ProjectionPolicyColumnConstraint, 487 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 488 ) 489 if self._match(TokenType.TAG): 490 return self.expression( 491 exp.TagColumnConstraint, 492 expressions=self._parse_wrapped_csv(self._parse_property), 493 ) 494 495 return None 496 497 def _parse_create(self) -> exp.Create | exp.Command: 498 expression = super()._parse_create() 499 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 500 # Replace the Table node with the enclosed Identifier 501 expression.this.replace(expression.this.this) 502 503 return expression 504 505 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 506 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 507 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 508 this = self._parse_var() or self._parse_type() 509 510 if not this: 511 return None 512 513 self._match(TokenType.COMMA) 514 expression = self._parse_bitwise() 515 this = map_date_part(this) 516 name = this.name.upper() 517 518 if name.startswith("EPOCH"): 519 if name == "EPOCH_MILLISECOND": 520 scale = 10**3 521 elif name == "EPOCH_MICROSECOND": 522 scale = 10**6 523 elif name == "EPOCH_NANOSECOND": 524 scale = 10**9 525 else: 526 scale = None 527 528 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 529 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 530 531 if scale: 532 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 533 534 return to_unix 535 536 return self.expression(exp.Extract, this=this, expression=expression) 537 538 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 539 if is_map: 540 # Keys are strings in Snowflake's objects, see also: 541 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 542 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 543 return self._parse_slice(self._parse_string()) 544 545 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 546 547 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 548 lateral = super()._parse_lateral() 549 if not lateral: 550 return lateral 551 552 if isinstance(lateral.this, exp.Explode): 553 table_alias = lateral.args.get("alias") 554 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 555 if table_alias and not table_alias.args.get("columns"): 556 table_alias.set("columns", columns) 557 elif not table_alias: 558 exp.alias_(lateral, "_flattened", table=columns, copy=False) 559 560 return lateral 561 562 def _parse_table_parts( 563 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 564 ) -> exp.Table: 565 # https://docs.snowflake.com/en/user-guide/querying-stage 566 if self._match(TokenType.STRING, advance=False): 567 table = self._parse_string() 568 elif self._match_text_seq("@", advance=False): 569 table = self._parse_location_path() 570 else: 571 table = None 572 573 if table: 574 file_format = None 575 pattern = None 576 577 wrapped = self._match(TokenType.L_PAREN) 578 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 579 if self._match_text_seq("FILE_FORMAT", "=>"): 580 file_format = self._parse_string() or super()._parse_table_parts( 581 is_db_reference=is_db_reference 582 ) 583 elif self._match_text_seq("PATTERN", "=>"): 584 pattern = self._parse_string() 585 else: 586 break 587 588 self._match(TokenType.COMMA) 589 590 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 591 else: 592 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 593 594 return table 595 596 def _parse_id_var( 597 self, 598 any_token: bool = True, 599 tokens: t.Optional[t.Collection[TokenType]] = None, 600 ) -> t.Optional[exp.Expression]: 601 if self._match_text_seq("IDENTIFIER", "("): 602 identifier = ( 603 super()._parse_id_var(any_token=any_token, tokens=tokens) 604 or self._parse_string() 605 ) 606 self._match_r_paren() 607 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 608 609 return super()._parse_id_var(any_token=any_token, tokens=tokens) 610 611 def _parse_show_snowflake(self, this: str) -> exp.Show: 612 scope = None 613 scope_kind = None 614 615 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 616 # which is syntactically valid but has no effect on the output 617 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 618 619 history = self._match_text_seq("HISTORY") 620 621 like = self._parse_string() if self._match(TokenType.LIKE) else None 622 623 if self._match(TokenType.IN): 624 if self._match_text_seq("ACCOUNT"): 625 scope_kind = "ACCOUNT" 626 elif self._match_set(self.DB_CREATABLES): 627 scope_kind = self._prev.text.upper() 628 if self._curr: 629 scope = self._parse_table_parts() 630 elif self._curr: 631 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 632 scope = self._parse_table_parts() 633 634 return self.expression( 635 exp.Show, 636 **{ 637 "terse": terse, 638 "this": this, 639 "history": history, 640 "like": like, 641 "scope": scope, 642 "scope_kind": scope_kind, 643 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 644 "limit": self._parse_limit(), 645 "from": self._parse_string() if self._match(TokenType.FROM) else None, 646 }, 647 ) 648 649 def _parse_alter_table_swap(self) -> exp.SwapTable: 650 self._match_text_seq("WITH") 651 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 652 653 def _parse_location_property(self) -> exp.LocationProperty: 654 self._match(TokenType.EQ) 655 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 656 657 def _parse_file_location(self) -> t.Optional[exp.Expression]: 658 # Parse either a subquery or a staged file 659 return ( 660 self._parse_select(table=True, parse_subquery_alias=False) 661 if self._match(TokenType.L_PAREN, advance=False) 662 else self._parse_table_parts() 663 ) 664 665 def _parse_location_path(self) -> exp.Var: 666 parts = [self._advance_any(ignore_reserved=True)] 667 668 # We avoid consuming a comma token because external tables like @foo and @bar 669 # can be joined in a query with a comma separator, as well as closing paren 670 # in case of subqueries 671 while self._is_connected() and not self._match_set( 672 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 673 ): 674 parts.append(self._advance_any(ignore_reserved=True)) 675 676 return exp.var("".join(part.text for part in parts if part)) 677 678 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 679 this = super()._parse_lambda_arg() 680 681 if not this: 682 return this 683 684 typ = self._parse_types() 685 686 if typ: 687 return self.expression(exp.Cast, this=this, to=typ) 688 689 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
691 class Tokenizer(tokens.Tokenizer): 692 STRING_ESCAPES = ["\\", "'"] 693 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 694 RAW_STRINGS = ["$$"] 695 COMMENTS = ["--", "//", ("/*", "*/")] 696 NESTED_COMMENTS = False 697 698 KEYWORDS = { 699 **tokens.Tokenizer.KEYWORDS, 700 "BYTEINT": TokenType.INT, 701 "CHAR VARYING": TokenType.VARCHAR, 702 "CHARACTER VARYING": TokenType.VARCHAR, 703 "EXCLUDE": TokenType.EXCEPT, 704 "ILIKE ANY": TokenType.ILIKE_ANY, 705 "LIKE ANY": TokenType.LIKE_ANY, 706 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 707 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 708 "MINUS": TokenType.EXCEPT, 709 "NCHAR VARYING": TokenType.VARCHAR, 710 "PUT": TokenType.COMMAND, 711 "REMOVE": TokenType.COMMAND, 712 "RM": TokenType.COMMAND, 713 "SAMPLE": TokenType.TABLE_SAMPLE, 714 "SQL_DOUBLE": TokenType.DOUBLE, 715 "SQL_VARCHAR": TokenType.VARCHAR, 716 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 717 "TAG": TokenType.TAG, 718 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 719 "TOP": TokenType.TOP, 720 "WAREHOUSE": TokenType.WAREHOUSE, 721 "STREAMLIT": TokenType.STREAMLIT, 722 } 723 KEYWORDS.pop("/*+") 724 725 SINGLE_TOKENS = { 726 **tokens.Tokenizer.SINGLE_TOKENS, 727 "$": TokenType.PARAMETER, 728 } 729 730 VAR_SINGLE_TOKENS = {"$"} 731 732 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
734 class Generator(generator.Generator): 735 PARAMETER_TOKEN = "$" 736 MATCHED_BY_SOURCE = False 737 SINGLE_STRING_INTERVAL = True 738 JOIN_HINTS = False 739 TABLE_HINTS = False 740 QUERY_HINTS = False 741 AGGREGATE_FILTER_SUPPORTED = False 742 SUPPORTS_TABLE_COPY = False 743 COLLATE_IS_FUNC = True 744 LIMIT_ONLY_LITERALS = True 745 JSON_KEY_VALUE_PAIR_SEP = "," 746 INSERT_OVERWRITE = " OVERWRITE INTO" 747 STRUCT_DELIMITER = ("(", ")") 748 COPY_PARAMS_ARE_WRAPPED = False 749 COPY_PARAMS_EQ_REQUIRED = True 750 STAR_EXCEPT = "EXCLUDE" 751 SUPPORTS_EXPLODING_PROJECTIONS = False 752 ARRAY_CONCAT_IS_VAR_LEN = False 753 SUPPORTS_CONVERT_TIMEZONE = True 754 755 TRANSFORMS = { 756 **generator.Generator.TRANSFORMS, 757 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 758 exp.ArgMax: rename_func("MAX_BY"), 759 exp.ArgMin: rename_func("MIN_BY"), 760 exp.Array: inline_array_sql, 761 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 762 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 763 exp.AtTimeZone: lambda self, e: self.func( 764 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 765 ), 766 exp.BitwiseXor: rename_func("BITXOR"), 767 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 768 exp.DateAdd: date_delta_sql("DATEADD"), 769 exp.DateDiff: date_delta_sql("DATEDIFF"), 770 exp.DateStrToDate: datestrtodate_sql, 771 exp.DayOfMonth: rename_func("DAYOFMONTH"), 772 exp.DayOfWeek: rename_func("DAYOFWEEK"), 773 exp.DayOfYear: rename_func("DAYOFYEAR"), 774 exp.Explode: rename_func("FLATTEN"), 775 exp.Extract: rename_func("DATE_PART"), 776 exp.FromTimeZone: lambda self, e: self.func( 777 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 778 ), 779 exp.GenerateSeries: lambda self, e: self.func( 780 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 781 ), 782 exp.GroupConcat: rename_func("LISTAGG"), 783 exp.If: if_sql(name="IFF", false_value="NULL"), 784 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 785 exp.JSONExtractScalar: lambda self, e: self.func( 786 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 787 ), 788 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 789 exp.JSONPathRoot: lambda *_: "", 790 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 791 exp.LogicalOr: rename_func("BOOLOR_AGG"), 792 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 793 exp.Max: max_or_greatest, 794 exp.Min: min_or_least, 795 exp.ParseJSON: lambda self, e: self.func( 796 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 797 ), 798 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 799 exp.PercentileCont: transforms.preprocess( 800 [transforms.add_within_group_for_percentiles] 801 ), 802 exp.PercentileDisc: transforms.preprocess( 803 [transforms.add_within_group_for_percentiles] 804 ), 805 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 806 exp.RegexpILike: _regexpilike_sql, 807 exp.Rand: rename_func("RANDOM"), 808 exp.Select: transforms.preprocess( 809 [ 810 transforms.eliminate_distinct_on, 811 transforms.explode_to_unnest(), 812 transforms.eliminate_semi_and_anti_joins, 813 _unnest_generate_date_array, 814 ] 815 ), 816 exp.SHA: rename_func("SHA1"), 817 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 818 exp.StartsWith: rename_func("STARTSWITH"), 819 exp.StrPosition: lambda self, e: self.func( 820 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 821 ), 822 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 823 exp.Stuff: rename_func("INSERT"), 824 exp.TimeAdd: date_delta_sql("TIMEADD"), 825 exp.TimestampDiff: lambda self, e: self.func( 826 "TIMESTAMPDIFF", e.unit, e.expression, e.this 827 ), 828 exp.TimestampTrunc: timestamptrunc_sql(), 829 exp.TimeStrToTime: timestrtotime_sql, 830 exp.TimeToStr: lambda self, e: self.func( 831 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 832 ), 833 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 834 exp.ToArray: rename_func("TO_ARRAY"), 835 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 836 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 837 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 838 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 839 exp.TsOrDsToDate: lambda self, e: self.func( 840 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 841 ), 842 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 843 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 844 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 845 exp.Xor: rename_func("BOOLXOR"), 846 } 847 848 SUPPORTED_JSON_PATH_PARTS = { 849 exp.JSONPathKey, 850 exp.JSONPathRoot, 851 exp.JSONPathSubscript, 852 } 853 854 TYPE_MAPPING = { 855 **generator.Generator.TYPE_MAPPING, 856 exp.DataType.Type.NESTED: "OBJECT", 857 exp.DataType.Type.STRUCT: "OBJECT", 858 } 859 860 PROPERTIES_LOCATION = { 861 **generator.Generator.PROPERTIES_LOCATION, 862 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 863 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 864 } 865 866 UNSUPPORTED_VALUES_EXPRESSIONS = { 867 exp.Map, 868 exp.StarMap, 869 exp.Struct, 870 exp.VarMap, 871 } 872 873 def with_properties(self, properties: exp.Properties) -> str: 874 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 875 876 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 877 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 878 values_as_table = False 879 880 return super().values_sql(expression, values_as_table=values_as_table) 881 882 def datatype_sql(self, expression: exp.DataType) -> str: 883 expressions = expression.expressions 884 if ( 885 expressions 886 and expression.is_type(*exp.DataType.STRUCT_TYPES) 887 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 888 ): 889 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 890 return "OBJECT" 891 892 return super().datatype_sql(expression) 893 894 def tonumber_sql(self, expression: exp.ToNumber) -> str: 895 return self.func( 896 "TO_NUMBER", 897 expression.this, 898 expression.args.get("format"), 899 expression.args.get("precision"), 900 expression.args.get("scale"), 901 ) 902 903 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 904 milli = expression.args.get("milli") 905 if milli is not None: 906 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 907 expression.set("nano", milli_to_nano) 908 909 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 910 911 def trycast_sql(self, expression: exp.TryCast) -> str: 912 value = expression.this 913 914 if value.type is None: 915 from sqlglot.optimizer.annotate_types import annotate_types 916 917 value = annotate_types(value) 918 919 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 920 return super().trycast_sql(expression) 921 922 # TRY_CAST only works for string values in Snowflake 923 return self.cast_sql(expression) 924 925 def log_sql(self, expression: exp.Log) -> str: 926 if not expression.expression: 927 return self.func("LN", expression.this) 928 929 return super().log_sql(expression) 930 931 def unnest_sql(self, expression: exp.Unnest) -> str: 932 unnest_alias = expression.args.get("alias") 933 offset = expression.args.get("offset") 934 935 columns = [ 936 exp.to_identifier("seq"), 937 exp.to_identifier("key"), 938 exp.to_identifier("path"), 939 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 940 seq_get(unnest_alias.columns if unnest_alias else [], 0) 941 or exp.to_identifier("value"), 942 exp.to_identifier("this"), 943 ] 944 945 if unnest_alias: 946 unnest_alias.set("columns", columns) 947 else: 948 unnest_alias = exp.TableAlias(this="_u", columns=columns) 949 950 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 951 alias = self.sql(unnest_alias) 952 alias = f" AS {alias}" if alias else "" 953 return f"{explode}{alias}" 954 955 def show_sql(self, expression: exp.Show) -> str: 956 terse = "TERSE " if expression.args.get("terse") else "" 957 history = " HISTORY" if expression.args.get("history") else "" 958 like = self.sql(expression, "like") 959 like = f" LIKE {like}" if like else "" 960 961 scope = self.sql(expression, "scope") 962 scope = f" {scope}" if scope else "" 963 964 scope_kind = self.sql(expression, "scope_kind") 965 if scope_kind: 966 scope_kind = f" IN {scope_kind}" 967 968 starts_with = self.sql(expression, "starts_with") 969 if starts_with: 970 starts_with = f" STARTS WITH {starts_with}" 971 972 limit = self.sql(expression, "limit") 973 974 from_ = self.sql(expression, "from") 975 if from_: 976 from_ = f" FROM {from_}" 977 978 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 979 980 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 981 # Other dialects don't support all of the following parameters, so we need to 982 # generate default values as necessary to ensure the transpilation is correct 983 group = expression.args.get("group") 984 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 985 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 986 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 987 988 return self.func( 989 "REGEXP_SUBSTR", 990 expression.this, 991 expression.expression, 992 position, 993 occurrence, 994 parameters, 995 group, 996 ) 997 998 def except_op(self, expression: exp.Except) -> str: 999 if not expression.args.get("distinct"): 1000 self.unsupported("EXCEPT with All is not supported in Snowflake") 1001 return super().except_op(expression) 1002 1003 def intersect_op(self, expression: exp.Intersect) -> str: 1004 if not expression.args.get("distinct"): 1005 self.unsupported("INTERSECT with All is not supported in Snowflake") 1006 return super().intersect_op(expression) 1007 1008 def describe_sql(self, expression: exp.Describe) -> str: 1009 # Default to table if kind is unknown 1010 kind_value = expression.args.get("kind") or "TABLE" 1011 kind = f" {kind_value}" if kind_value else "" 1012 this = f" {self.sql(expression, 'this')}" 1013 expressions = self.expressions(expression, flat=True) 1014 expressions = f" {expressions}" if expressions else "" 1015 return f"DESCRIBE{kind}{this}{expressions}" 1016 1017 def generatedasidentitycolumnconstraint_sql( 1018 self, expression: exp.GeneratedAsIdentityColumnConstraint 1019 ) -> str: 1020 start = expression.args.get("start") 1021 start = f" START {start}" if start else "" 1022 increment = expression.args.get("increment") 1023 increment = f" INCREMENT {increment}" if increment else "" 1024 return f"AUTOINCREMENT{start}{increment}" 1025 1026 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1027 this = self.sql(expression, "this") 1028 return f"SWAP WITH {this}" 1029 1030 def cluster_sql(self, expression: exp.Cluster) -> str: 1031 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1032 1033 def struct_sql(self, expression: exp.Struct) -> str: 1034 keys = [] 1035 values = [] 1036 1037 for i, e in enumerate(expression.expressions): 1038 if isinstance(e, exp.PropertyEQ): 1039 keys.append( 1040 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1041 ) 1042 values.append(e.expression) 1043 else: 1044 keys.append(exp.Literal.string(f"_{i}")) 1045 values.append(e) 1046 1047 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1048 1049 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1050 if expression.args.get("weight") or expression.args.get("accuracy"): 1051 self.unsupported( 1052 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1053 ) 1054 1055 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1056 1057 def alterset_sql(self, expression: exp.AlterSet) -> str: 1058 exprs = self.expressions(expression, flat=True) 1059 exprs = f" {exprs}" if exprs else "" 1060 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1061 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1062 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1063 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1064 tag = self.expressions(expression, key="tag", flat=True) 1065 tag = f" TAG {tag}" if tag else "" 1066 1067 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
882 def datatype_sql(self, expression: exp.DataType) -> str: 883 expressions = expression.expressions 884 if ( 885 expressions 886 and expression.is_type(*exp.DataType.STRUCT_TYPES) 887 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 888 ): 889 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 890 return "OBJECT" 891 892 return super().datatype_sql(expression)
903 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 904 milli = expression.args.get("milli") 905 if milli is not None: 906 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 907 expression.set("nano", milli_to_nano) 908 909 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
911 def trycast_sql(self, expression: exp.TryCast) -> str: 912 value = expression.this 913 914 if value.type is None: 915 from sqlglot.optimizer.annotate_types import annotate_types 916 917 value = annotate_types(value) 918 919 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 920 return super().trycast_sql(expression) 921 922 # TRY_CAST only works for string values in Snowflake 923 return self.cast_sql(expression)
931 def unnest_sql(self, expression: exp.Unnest) -> str: 932 unnest_alias = expression.args.get("alias") 933 offset = expression.args.get("offset") 934 935 columns = [ 936 exp.to_identifier("seq"), 937 exp.to_identifier("key"), 938 exp.to_identifier("path"), 939 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 940 seq_get(unnest_alias.columns if unnest_alias else [], 0) 941 or exp.to_identifier("value"), 942 exp.to_identifier("this"), 943 ] 944 945 if unnest_alias: 946 unnest_alias.set("columns", columns) 947 else: 948 unnest_alias = exp.TableAlias(this="_u", columns=columns) 949 950 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 951 alias = self.sql(unnest_alias) 952 alias = f" AS {alias}" if alias else "" 953 return f"{explode}{alias}"
955 def show_sql(self, expression: exp.Show) -> str: 956 terse = "TERSE " if expression.args.get("terse") else "" 957 history = " HISTORY" if expression.args.get("history") else "" 958 like = self.sql(expression, "like") 959 like = f" LIKE {like}" if like else "" 960 961 scope = self.sql(expression, "scope") 962 scope = f" {scope}" if scope else "" 963 964 scope_kind = self.sql(expression, "scope_kind") 965 if scope_kind: 966 scope_kind = f" IN {scope_kind}" 967 968 starts_with = self.sql(expression, "starts_with") 969 if starts_with: 970 starts_with = f" STARTS WITH {starts_with}" 971 972 limit = self.sql(expression, "limit") 973 974 from_ = self.sql(expression, "from") 975 if from_: 976 from_ = f" FROM {from_}" 977 978 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
980 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 981 # Other dialects don't support all of the following parameters, so we need to 982 # generate default values as necessary to ensure the transpilation is correct 983 group = expression.args.get("group") 984 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 985 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 986 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 987 988 return self.func( 989 "REGEXP_SUBSTR", 990 expression.this, 991 expression.expression, 992 position, 993 occurrence, 994 parameters, 995 group, 996 )
1008 def describe_sql(self, expression: exp.Describe) -> str: 1009 # Default to table if kind is unknown 1010 kind_value = expression.args.get("kind") or "TABLE" 1011 kind = f" {kind_value}" if kind_value else "" 1012 this = f" {self.sql(expression, 'this')}" 1013 expressions = self.expressions(expression, flat=True) 1014 expressions = f" {expressions}" if expressions else "" 1015 return f"DESCRIBE{kind}{this}{expressions}"
1017 def generatedasidentitycolumnconstraint_sql( 1018 self, expression: exp.GeneratedAsIdentityColumnConstraint 1019 ) -> str: 1020 start = expression.args.get("start") 1021 start = f" START {start}" if start else "" 1022 increment = expression.args.get("increment") 1023 increment = f" INCREMENT {increment}" if increment else "" 1024 return f"AUTOINCREMENT{start}{increment}"
1033 def struct_sql(self, expression: exp.Struct) -> str: 1034 keys = [] 1035 values = [] 1036 1037 for i, e in enumerate(expression.expressions): 1038 if isinstance(e, exp.PropertyEQ): 1039 keys.append( 1040 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1041 ) 1042 values.append(e.expression) 1043 else: 1044 keys.append(exp.Literal.string(f"_{i}")) 1045 values.append(e) 1046 1047 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1049 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1050 if expression.args.get("weight") or expression.args.get("accuracy"): 1051 self.unsupported( 1052 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1053 ) 1054 1055 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1057 def alterset_sql(self, expression: exp.AlterSet) -> str: 1058 exprs = self.expressions(expression, flat=True) 1059 exprs = f" {exprs}" if exprs else "" 1060 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1061 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1062 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1063 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1064 tag = self.expressions(expression, key="tag", flat=True) 1065 tag = f" TAG {tag}" if tag else "" 1066 1067 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql