sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 date_delta_sql, 12 date_trunc_to_time, 13 datestrtodate_sql, 14 build_formatted_time, 15 if_sql, 16 inline_array_sql, 17 max_or_greatest, 18 min_or_least, 19 rename_func, 20 timestamptrunc_sql, 21 timestrtotime_sql, 22 var_map_sql, 23) 24from sqlglot.helper import flatten, is_float, is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _build_datetime( 33 name: str, kind: exp.DataType.Type, safe: bool = False 34) -> t.Callable[[t.List], exp.Func]: 35 def _builder(args: t.List) -> exp.Func: 36 value = seq_get(args, 0) 37 int_value = value is not None and is_int(value.name) 38 39 if isinstance(value, exp.Literal): 40 # Converts calls like `TO_TIME('01:02:03')` into casts 41 if len(args) == 1 and value.is_string and not int_value: 42 return exp.cast(value, kind) 43 44 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 45 # cases so we can transpile them, since they're relatively common 46 if kind == exp.DataType.Type.TIMESTAMP: 47 if int_value: 48 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 49 if not is_float(value.this): 50 return build_formatted_time(exp.StrToTime, "snowflake")(args) 51 52 if kind == exp.DataType.Type.DATE and not int_value: 53 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 54 formatted_exp.set("safe", safe) 55 return formatted_exp 56 57 return exp.Anonymous(this=name, expressions=args) 58 59 return _builder 60 61 62def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 63 expression = parser.build_var_map(args) 64 65 if isinstance(expression, exp.StarMap): 66 return expression 67 68 return exp.Struct( 69 expressions=[ 70 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 71 ] 72 ) 73 74 75def _build_datediff(args: t.List) -> exp.DateDiff: 76 return exp.DateDiff( 77 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 78 ) 79 80 81def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 82 def _builder(args: t.List) -> E: 83 return expr_type( 84 this=seq_get(args, 2), 85 expression=seq_get(args, 1), 86 unit=_map_date_part(seq_get(args, 0)), 87 ) 88 89 return _builder 90 91 92# https://docs.snowflake.com/en/sql-reference/functions/div0 93def _build_if_from_div0(args: t.List) -> exp.If: 94 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 95 true = exp.Literal.number(0) 96 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 97 return exp.If(this=cond, true=true, false=false) 98 99 100# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 101def _build_if_from_zeroifnull(args: t.List) -> exp.If: 102 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 103 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 104 105 106# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 107def _build_if_from_nullifzero(args: t.List) -> exp.If: 108 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 109 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 110 111 112def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 113 flag = expression.text("flag") 114 115 if "i" not in flag: 116 flag += "i" 117 118 return self.func( 119 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 120 ) 121 122 123def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 124 if len(args) == 3: 125 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 126 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 127 128 129def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 130 regexp_replace = exp.RegexpReplace.from_arg_list(args) 131 132 if not regexp_replace.args.get("replacement"): 133 regexp_replace.set("replacement", exp.Literal.string("")) 134 135 return regexp_replace 136 137 138def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 139 def _parse(self: Snowflake.Parser) -> exp.Show: 140 return self._parse_show_snowflake(*args, **kwargs) 141 142 return _parse 143 144 145DATE_PART_MAPPING = { 146 "Y": "YEAR", 147 "YY": "YEAR", 148 "YYY": "YEAR", 149 "YYYY": "YEAR", 150 "YR": "YEAR", 151 "YEARS": "YEAR", 152 "YRS": "YEAR", 153 "MM": "MONTH", 154 "MON": "MONTH", 155 "MONS": "MONTH", 156 "MONTHS": "MONTH", 157 "D": "DAY", 158 "DD": "DAY", 159 "DAYS": "DAY", 160 "DAYOFMONTH": "DAY", 161 "WEEKDAY": "DAYOFWEEK", 162 "DOW": "DAYOFWEEK", 163 "DW": "DAYOFWEEK", 164 "WEEKDAY_ISO": "DAYOFWEEKISO", 165 "DOW_ISO": "DAYOFWEEKISO", 166 "DW_ISO": "DAYOFWEEKISO", 167 "YEARDAY": "DAYOFYEAR", 168 "DOY": "DAYOFYEAR", 169 "DY": "DAYOFYEAR", 170 "W": "WEEK", 171 "WK": "WEEK", 172 "WEEKOFYEAR": "WEEK", 173 "WOY": "WEEK", 174 "WY": "WEEK", 175 "WEEK_ISO": "WEEKISO", 176 "WEEKOFYEARISO": "WEEKISO", 177 "WEEKOFYEAR_ISO": "WEEKISO", 178 "Q": "QUARTER", 179 "QTR": "QUARTER", 180 "QTRS": "QUARTER", 181 "QUARTERS": "QUARTER", 182 "H": "HOUR", 183 "HH": "HOUR", 184 "HR": "HOUR", 185 "HOURS": "HOUR", 186 "HRS": "HOUR", 187 "M": "MINUTE", 188 "MI": "MINUTE", 189 "MIN": "MINUTE", 190 "MINUTES": "MINUTE", 191 "MINS": "MINUTE", 192 "S": "SECOND", 193 "SEC": "SECOND", 194 "SECONDS": "SECOND", 195 "SECS": "SECOND", 196 "MS": "MILLISECOND", 197 "MSEC": "MILLISECOND", 198 "MILLISECONDS": "MILLISECOND", 199 "US": "MICROSECOND", 200 "USEC": "MICROSECOND", 201 "MICROSECONDS": "MICROSECOND", 202 "NS": "NANOSECOND", 203 "NSEC": "NANOSECOND", 204 "NANOSEC": "NANOSECOND", 205 "NSECOND": "NANOSECOND", 206 "NSECONDS": "NANOSECOND", 207 "NANOSECS": "NANOSECOND", 208 "EPOCH": "EPOCH_SECOND", 209 "EPOCH_SECONDS": "EPOCH_SECOND", 210 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 211 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 212 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 213 "TZH": "TIMEZONE_HOUR", 214 "TZM": "TIMEZONE_MINUTE", 215} 216 217 218@t.overload 219def _map_date_part(part: exp.Expression) -> exp.Var: 220 pass 221 222 223@t.overload 224def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 225 pass 226 227 228def _map_date_part(part): 229 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 230 return exp.var(mapped) if mapped else part 231 232 233def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 234 trunc = date_trunc_to_time(args) 235 trunc.set("unit", _map_date_part(trunc.args["unit"])) 236 return trunc 237 238 239def _build_timestamp_from_parts(args: t.List) -> exp.Func: 240 if len(args) == 2: 241 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 242 # so we parse this into Anonymous for now instead of introducing complexity 243 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 244 245 return exp.TimestampFromParts.from_arg_list(args) 246 247 248def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 249 """ 250 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 251 so we need to unqualify them. 252 253 Example: 254 >>> from sqlglot import parse_one 255 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 256 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 257 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 258 """ 259 if isinstance(expression, exp.Pivot) and expression.unpivot: 260 expression = transforms.unqualify_columns(expression) 261 262 return expression 263 264 265def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 266 assert isinstance(expression, exp.Create) 267 268 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 269 if expression.this in exp.DataType.NESTED_TYPES: 270 expression.set("expressions", None) 271 return expression 272 273 props = expression.args.get("properties") 274 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 275 for schema_expression in expression.this.expressions: 276 if isinstance(schema_expression, exp.ColumnDef): 277 column_type = schema_expression.kind 278 if isinstance(column_type, exp.DataType): 279 column_type.transform(_flatten_structured_type, copy=False) 280 281 return expression 282 283 284class Snowflake(Dialect): 285 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 286 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 287 NULL_ORDERING = "nulls_are_large" 288 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 289 SUPPORTS_USER_DEFINED_TYPES = False 290 SUPPORTS_SEMI_ANTI_JOIN = False 291 PREFER_CTE_ALIAS_COLUMN = True 292 TABLESAMPLE_SIZE_IS_PERCENT = True 293 COPY_PARAMS_ARE_CSV = False 294 295 TIME_MAPPING = { 296 "YYYY": "%Y", 297 "yyyy": "%Y", 298 "YY": "%y", 299 "yy": "%y", 300 "MMMM": "%B", 301 "mmmm": "%B", 302 "MON": "%b", 303 "mon": "%b", 304 "MM": "%m", 305 "mm": "%m", 306 "DD": "%d", 307 "dd": "%-d", 308 "DY": "%a", 309 "dy": "%w", 310 "HH24": "%H", 311 "hh24": "%H", 312 "HH12": "%I", 313 "hh12": "%I", 314 "MI": "%M", 315 "mi": "%M", 316 "SS": "%S", 317 "ss": "%S", 318 "FF": "%f", 319 "ff": "%f", 320 "FF6": "%f", 321 "ff6": "%f", 322 } 323 324 def quote_identifier(self, expression: E, identify: bool = True) -> E: 325 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 326 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 327 if ( 328 isinstance(expression, exp.Identifier) 329 and isinstance(expression.parent, exp.Table) 330 and expression.name.lower() == "dual" 331 ): 332 return expression # type: ignore 333 334 return super().quote_identifier(expression, identify=identify) 335 336 class Parser(parser.Parser): 337 IDENTIFY_PIVOT_STRINGS = True 338 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 339 COLON_IS_JSON_EXTRACT = True 340 341 ID_VAR_TOKENS = { 342 *parser.Parser.ID_VAR_TOKENS, 343 TokenType.MATCH_CONDITION, 344 } 345 346 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 347 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 352 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 353 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 354 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 355 this=seq_get(args, 1), expression=seq_get(args, 0) 356 ), 357 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 358 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 359 start=seq_get(args, 0), 360 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 361 step=seq_get(args, 2), 362 ), 363 "BITXOR": binary_from_function(exp.BitwiseXor), 364 "BIT_XOR": binary_from_function(exp.BitwiseXor), 365 "BOOLXOR": binary_from_function(exp.Xor), 366 "CONVERT_TIMEZONE": _build_convert_timezone, 367 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 368 "DATE_TRUNC": _date_trunc_to_time, 369 "DATEADD": _build_date_time_add(exp.DateAdd), 370 "DATEDIFF": _build_datediff, 371 "DIV0": _build_if_from_div0, 372 "FLATTEN": exp.Explode.from_arg_list, 373 "GET_PATH": lambda args, dialect: exp.JSONExtract( 374 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 375 ), 376 "IFF": exp.If.from_arg_list, 377 "LAST_DAY": lambda args: exp.LastDay( 378 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 379 ), 380 "LISTAGG": exp.GroupConcat.from_arg_list, 381 "MEDIAN": lambda args: exp.PercentileCont( 382 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 383 ), 384 "NULLIFZERO": _build_if_from_nullifzero, 385 "OBJECT_CONSTRUCT": _build_object_construct, 386 "REGEXP_REPLACE": _build_regexp_replace, 387 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 388 "RLIKE": exp.RegexpLike.from_arg_list, 389 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 390 "TIMEADD": _build_date_time_add(exp.TimeAdd), 391 "TIMEDIFF": _build_datediff, 392 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 393 "TIMESTAMPDIFF": _build_datediff, 394 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 395 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 396 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 397 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 398 "TO_NUMBER": lambda args: exp.ToNumber( 399 this=seq_get(args, 0), 400 format=seq_get(args, 1), 401 precision=seq_get(args, 2), 402 scale=seq_get(args, 3), 403 ), 404 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 405 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 406 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 407 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 408 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 409 "TO_VARCHAR": exp.ToChar.from_arg_list, 410 "ZEROIFNULL": _build_if_from_zeroifnull, 411 } 412 413 FUNCTION_PARSERS = { 414 **parser.Parser.FUNCTION_PARSERS, 415 "DATE_PART": lambda self: self._parse_date_part(), 416 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 417 } 418 FUNCTION_PARSERS.pop("TRIM") 419 420 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 421 422 RANGE_PARSERS = { 423 **parser.Parser.RANGE_PARSERS, 424 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 425 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 426 } 427 428 ALTER_PARSERS = { 429 **parser.Parser.ALTER_PARSERS, 430 "UNSET": lambda self: self.expression( 431 exp.Set, 432 tag=self._match_text_seq("TAG"), 433 expressions=self._parse_csv(self._parse_id_var), 434 unset=True, 435 ), 436 "SWAP": lambda self: self._parse_alter_table_swap(), 437 } 438 439 STATEMENT_PARSERS = { 440 **parser.Parser.STATEMENT_PARSERS, 441 TokenType.SHOW: lambda self: self._parse_show(), 442 } 443 444 PROPERTY_PARSERS = { 445 **parser.Parser.PROPERTY_PARSERS, 446 "LOCATION": lambda self: self._parse_location_property(), 447 } 448 449 TYPE_CONVERTER = { 450 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 451 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 452 } 453 454 SHOW_PARSERS = { 455 "SCHEMAS": _show_parser("SCHEMAS"), 456 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 457 "OBJECTS": _show_parser("OBJECTS"), 458 "TERSE OBJECTS": _show_parser("OBJECTS"), 459 "TABLES": _show_parser("TABLES"), 460 "TERSE TABLES": _show_parser("TABLES"), 461 "VIEWS": _show_parser("VIEWS"), 462 "TERSE VIEWS": _show_parser("VIEWS"), 463 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 464 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 466 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 468 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "SEQUENCES": _show_parser("SEQUENCES"), 470 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 471 "COLUMNS": _show_parser("COLUMNS"), 472 "USERS": _show_parser("USERS"), 473 "TERSE USERS": _show_parser("USERS"), 474 } 475 476 STAGED_FILE_SINGLE_TOKENS = { 477 TokenType.DOT, 478 TokenType.MOD, 479 TokenType.SLASH, 480 } 481 482 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 483 484 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 485 486 LAMBDAS = { 487 **parser.Parser.LAMBDAS, 488 TokenType.ARROW: lambda self, expressions: self.expression( 489 exp.Lambda, 490 this=self._replace_lambda( 491 self._parse_conjunction(), 492 expressions, 493 ), 494 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 495 ), 496 } 497 498 def _parse_create(self) -> exp.Create | exp.Command: 499 expression = super()._parse_create() 500 if isinstance(expression, exp.Create) and expression.kind == "TAG": 501 # Replace the Table node with the enclosed Identifier 502 expression.this.replace(expression.this.this) 503 504 return expression 505 506 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 507 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 508 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 509 this = self._parse_var() or self._parse_type() 510 511 if not this: 512 return None 513 514 self._match(TokenType.COMMA) 515 expression = self._parse_bitwise() 516 this = _map_date_part(this) 517 name = this.name.upper() 518 519 if name.startswith("EPOCH"): 520 if name == "EPOCH_MILLISECOND": 521 scale = 10**3 522 elif name == "EPOCH_MICROSECOND": 523 scale = 10**6 524 elif name == "EPOCH_NANOSECOND": 525 scale = 10**9 526 else: 527 scale = None 528 529 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 530 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 531 532 if scale: 533 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 534 535 return to_unix 536 537 return self.expression(exp.Extract, this=this, expression=expression) 538 539 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 540 if is_map: 541 # Keys are strings in Snowflake's objects, see also: 542 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 543 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 544 return self._parse_slice(self._parse_string()) 545 546 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 547 548 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 549 lateral = super()._parse_lateral() 550 if not lateral: 551 return lateral 552 553 if isinstance(lateral.this, exp.Explode): 554 table_alias = lateral.args.get("alias") 555 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 556 if table_alias and not table_alias.args.get("columns"): 557 table_alias.set("columns", columns) 558 elif not table_alias: 559 exp.alias_(lateral, "_flattened", table=columns, copy=False) 560 561 return lateral 562 563 def _parse_at_before(self, table: exp.Table) -> exp.Table: 564 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 565 index = self._index 566 if self._match_texts(("AT", "BEFORE")): 567 this = self._prev.text.upper() 568 kind = ( 569 self._match(TokenType.L_PAREN) 570 and self._match_texts(self.HISTORICAL_DATA_KIND) 571 and self._prev.text.upper() 572 ) 573 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 574 575 if expression: 576 self._match_r_paren() 577 when = self.expression( 578 exp.HistoricalData, this=this, kind=kind, expression=expression 579 ) 580 table.set("when", when) 581 else: 582 self._retreat(index) 583 584 return table 585 586 def _parse_table_parts( 587 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 588 ) -> exp.Table: 589 # https://docs.snowflake.com/en/user-guide/querying-stage 590 if self._match(TokenType.STRING, advance=False): 591 table = self._parse_string() 592 elif self._match_text_seq("@", advance=False): 593 table = self._parse_location_path() 594 else: 595 table = None 596 597 if table: 598 file_format = None 599 pattern = None 600 601 wrapped = self._match(TokenType.L_PAREN) 602 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 603 if self._match_text_seq("FILE_FORMAT", "=>"): 604 file_format = self._parse_string() or super()._parse_table_parts( 605 is_db_reference=is_db_reference 606 ) 607 elif self._match_text_seq("PATTERN", "=>"): 608 pattern = self._parse_string() 609 else: 610 break 611 612 self._match(TokenType.COMMA) 613 614 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 615 else: 616 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 617 618 return self._parse_at_before(table) 619 620 def _parse_id_var( 621 self, 622 any_token: bool = True, 623 tokens: t.Optional[t.Collection[TokenType]] = None, 624 ) -> t.Optional[exp.Expression]: 625 if self._match_text_seq("IDENTIFIER", "("): 626 identifier = ( 627 super()._parse_id_var(any_token=any_token, tokens=tokens) 628 or self._parse_string() 629 ) 630 self._match_r_paren() 631 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 632 633 return super()._parse_id_var(any_token=any_token, tokens=tokens) 634 635 def _parse_show_snowflake(self, this: str) -> exp.Show: 636 scope = None 637 scope_kind = None 638 639 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 640 # which is syntactically valid but has no effect on the output 641 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 642 643 history = self._match_text_seq("HISTORY") 644 645 like = self._parse_string() if self._match(TokenType.LIKE) else None 646 647 if self._match(TokenType.IN): 648 if self._match_text_seq("ACCOUNT"): 649 scope_kind = "ACCOUNT" 650 elif self._match_set(self.DB_CREATABLES): 651 scope_kind = self._prev.text.upper() 652 if self._curr: 653 scope = self._parse_table_parts() 654 elif self._curr: 655 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 656 scope = self._parse_table_parts() 657 658 return self.expression( 659 exp.Show, 660 **{ 661 "terse": terse, 662 "this": this, 663 "history": history, 664 "like": like, 665 "scope": scope, 666 "scope_kind": scope_kind, 667 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 668 "limit": self._parse_limit(), 669 "from": self._parse_string() if self._match(TokenType.FROM) else None, 670 }, 671 ) 672 673 def _parse_alter_table_swap(self) -> exp.SwapTable: 674 self._match_text_seq("WITH") 675 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 676 677 def _parse_location_property(self) -> exp.LocationProperty: 678 self._match(TokenType.EQ) 679 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 680 681 def _parse_file_location(self) -> t.Optional[exp.Expression]: 682 # Parse either a subquery or a staged file 683 return ( 684 self._parse_select(table=True) 685 if self._match(TokenType.L_PAREN, advance=False) 686 else self._parse_table_parts() 687 ) 688 689 def _parse_location_path(self) -> exp.Var: 690 parts = [self._advance_any(ignore_reserved=True)] 691 692 # We avoid consuming a comma token because external tables like @foo and @bar 693 # can be joined in a query with a comma separator, as well as closing paren 694 # in case of subqueries 695 while self._is_connected() and not self._match_set( 696 (TokenType.COMMA, TokenType.R_PAREN), advance=False 697 ): 698 parts.append(self._advance_any(ignore_reserved=True)) 699 700 return exp.var("".join(part.text for part in parts if part)) 701 702 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 703 this = super()._parse_lambda_arg() 704 705 if not this: 706 return this 707 708 typ = self._parse_types() 709 710 if typ: 711 return self.expression(exp.Cast, this=this, to=typ) 712 713 return this 714 715 class Tokenizer(tokens.Tokenizer): 716 STRING_ESCAPES = ["\\", "'"] 717 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 718 RAW_STRINGS = ["$$"] 719 COMMENTS = ["--", "//", ("/*", "*/")] 720 721 KEYWORDS = { 722 **tokens.Tokenizer.KEYWORDS, 723 "BYTEINT": TokenType.INT, 724 "CHAR VARYING": TokenType.VARCHAR, 725 "CHARACTER VARYING": TokenType.VARCHAR, 726 "EXCLUDE": TokenType.EXCEPT, 727 "ILIKE ANY": TokenType.ILIKE_ANY, 728 "LIKE ANY": TokenType.LIKE_ANY, 729 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 730 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 731 "MINUS": TokenType.EXCEPT, 732 "NCHAR VARYING": TokenType.VARCHAR, 733 "PUT": TokenType.COMMAND, 734 "REMOVE": TokenType.COMMAND, 735 "RM": TokenType.COMMAND, 736 "SAMPLE": TokenType.TABLE_SAMPLE, 737 "SQL_DOUBLE": TokenType.DOUBLE, 738 "SQL_VARCHAR": TokenType.VARCHAR, 739 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 740 "TAG": TokenType.TAG, 741 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 742 "TOP": TokenType.TOP, 743 } 744 745 SINGLE_TOKENS = { 746 **tokens.Tokenizer.SINGLE_TOKENS, 747 "$": TokenType.PARAMETER, 748 } 749 750 VAR_SINGLE_TOKENS = {"$"} 751 752 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 753 754 class Generator(generator.Generator): 755 PARAMETER_TOKEN = "$" 756 MATCHED_BY_SOURCE = False 757 SINGLE_STRING_INTERVAL = True 758 JOIN_HINTS = False 759 TABLE_HINTS = False 760 QUERY_HINTS = False 761 AGGREGATE_FILTER_SUPPORTED = False 762 SUPPORTS_TABLE_COPY = False 763 COLLATE_IS_FUNC = True 764 LIMIT_ONLY_LITERALS = True 765 JSON_KEY_VALUE_PAIR_SEP = "," 766 INSERT_OVERWRITE = " OVERWRITE INTO" 767 STRUCT_DELIMITER = ("(", ")") 768 COPY_PARAMS_ARE_WRAPPED = False 769 COPY_PARAMS_EQ_REQUIRED = True 770 STAR_EXCEPT = "EXCLUDE" 771 772 TRANSFORMS = { 773 **generator.Generator.TRANSFORMS, 774 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 775 exp.ArgMax: rename_func("MAX_BY"), 776 exp.ArgMin: rename_func("MIN_BY"), 777 exp.Array: inline_array_sql, 778 exp.ArrayConcat: rename_func("ARRAY_CAT"), 779 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 780 exp.AtTimeZone: lambda self, e: self.func( 781 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 782 ), 783 exp.BitwiseXor: rename_func("BITXOR"), 784 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 785 exp.DateAdd: date_delta_sql("DATEADD"), 786 exp.DateDiff: date_delta_sql("DATEDIFF"), 787 exp.DateStrToDate: datestrtodate_sql, 788 exp.DayOfMonth: rename_func("DAYOFMONTH"), 789 exp.DayOfWeek: rename_func("DAYOFWEEK"), 790 exp.DayOfYear: rename_func("DAYOFYEAR"), 791 exp.Explode: rename_func("FLATTEN"), 792 exp.Extract: rename_func("DATE_PART"), 793 exp.FromTimeZone: lambda self, e: self.func( 794 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 795 ), 796 exp.GenerateSeries: lambda self, e: self.func( 797 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 798 ), 799 exp.GroupConcat: rename_func("LISTAGG"), 800 exp.If: if_sql(name="IFF", false_value="NULL"), 801 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 802 exp.JSONExtractScalar: lambda self, e: self.func( 803 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 804 ), 805 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 806 exp.JSONPathRoot: lambda *_: "", 807 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 808 exp.LogicalOr: rename_func("BOOLOR_AGG"), 809 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 810 exp.Max: max_or_greatest, 811 exp.Min: min_or_least, 812 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 813 exp.PercentileCont: transforms.preprocess( 814 [transforms.add_within_group_for_percentiles] 815 ), 816 exp.PercentileDisc: transforms.preprocess( 817 [transforms.add_within_group_for_percentiles] 818 ), 819 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 820 exp.RegexpILike: _regexpilike_sql, 821 exp.Rand: rename_func("RANDOM"), 822 exp.Select: transforms.preprocess( 823 [ 824 transforms.eliminate_distinct_on, 825 transforms.explode_to_unnest(), 826 transforms.eliminate_semi_and_anti_joins, 827 ] 828 ), 829 exp.SHA: rename_func("SHA1"), 830 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 831 exp.StartsWith: rename_func("STARTSWITH"), 832 exp.StrPosition: lambda self, e: self.func( 833 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 834 ), 835 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 836 exp.Stuff: rename_func("INSERT"), 837 exp.TimeAdd: date_delta_sql("TIMEADD"), 838 exp.TimestampDiff: lambda self, e: self.func( 839 "TIMESTAMPDIFF", e.unit, e.expression, e.this 840 ), 841 exp.TimestampTrunc: timestamptrunc_sql(), 842 exp.TimeStrToTime: timestrtotime_sql, 843 exp.TimeToStr: lambda self, e: self.func( 844 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 845 ), 846 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 847 exp.ToArray: rename_func("TO_ARRAY"), 848 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 849 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 850 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 851 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 852 exp.TsOrDsToDate: lambda self, e: self.func( 853 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 854 ), 855 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 856 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 857 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 858 exp.Xor: rename_func("BOOLXOR"), 859 } 860 861 SUPPORTED_JSON_PATH_PARTS = { 862 exp.JSONPathKey, 863 exp.JSONPathRoot, 864 exp.JSONPathSubscript, 865 } 866 867 TYPE_MAPPING = { 868 **generator.Generator.TYPE_MAPPING, 869 exp.DataType.Type.NESTED: "OBJECT", 870 exp.DataType.Type.STRUCT: "OBJECT", 871 } 872 873 PROPERTIES_LOCATION = { 874 **generator.Generator.PROPERTIES_LOCATION, 875 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 876 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 877 } 878 879 UNSUPPORTED_VALUES_EXPRESSIONS = { 880 exp.Map, 881 exp.StarMap, 882 exp.Struct, 883 exp.VarMap, 884 } 885 886 def with_properties(self, properties: exp.Properties) -> str: 887 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 888 889 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 890 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 891 values_as_table = False 892 893 return super().values_sql(expression, values_as_table=values_as_table) 894 895 def datatype_sql(self, expression: exp.DataType) -> str: 896 expressions = expression.expressions 897 if ( 898 expressions 899 and expression.is_type(*exp.DataType.STRUCT_TYPES) 900 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 901 ): 902 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 903 return "OBJECT" 904 905 return super().datatype_sql(expression) 906 907 def tonumber_sql(self, expression: exp.ToNumber) -> str: 908 return self.func( 909 "TO_NUMBER", 910 expression.this, 911 expression.args.get("format"), 912 expression.args.get("precision"), 913 expression.args.get("scale"), 914 ) 915 916 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 917 milli = expression.args.get("milli") 918 if milli is not None: 919 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 920 expression.set("nano", milli_to_nano) 921 922 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 923 924 def trycast_sql(self, expression: exp.TryCast) -> str: 925 value = expression.this 926 927 if value.type is None: 928 from sqlglot.optimizer.annotate_types import annotate_types 929 930 value = annotate_types(value) 931 932 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 933 return super().trycast_sql(expression) 934 935 # TRY_CAST only works for string values in Snowflake 936 return self.cast_sql(expression) 937 938 def log_sql(self, expression: exp.Log) -> str: 939 if not expression.expression: 940 return self.func("LN", expression.this) 941 942 return super().log_sql(expression) 943 944 def unnest_sql(self, expression: exp.Unnest) -> str: 945 unnest_alias = expression.args.get("alias") 946 offset = expression.args.get("offset") 947 948 columns = [ 949 exp.to_identifier("seq"), 950 exp.to_identifier("key"), 951 exp.to_identifier("path"), 952 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 953 seq_get(unnest_alias.columns if unnest_alias else [], 0) 954 or exp.to_identifier("value"), 955 exp.to_identifier("this"), 956 ] 957 958 if unnest_alias: 959 unnest_alias.set("columns", columns) 960 else: 961 unnest_alias = exp.TableAlias(this="_u", columns=columns) 962 963 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 964 alias = self.sql(unnest_alias) 965 alias = f" AS {alias}" if alias else "" 966 return f"{explode}{alias}" 967 968 def show_sql(self, expression: exp.Show) -> str: 969 terse = "TERSE " if expression.args.get("terse") else "" 970 history = " HISTORY" if expression.args.get("history") else "" 971 like = self.sql(expression, "like") 972 like = f" LIKE {like}" if like else "" 973 974 scope = self.sql(expression, "scope") 975 scope = f" {scope}" if scope else "" 976 977 scope_kind = self.sql(expression, "scope_kind") 978 if scope_kind: 979 scope_kind = f" IN {scope_kind}" 980 981 starts_with = self.sql(expression, "starts_with") 982 if starts_with: 983 starts_with = f" STARTS WITH {starts_with}" 984 985 limit = self.sql(expression, "limit") 986 987 from_ = self.sql(expression, "from") 988 if from_: 989 from_ = f" FROM {from_}" 990 991 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 992 993 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 994 # Other dialects don't support all of the following parameters, so we need to 995 # generate default values as necessary to ensure the transpilation is correct 996 group = expression.args.get("group") 997 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 998 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 999 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1000 1001 return self.func( 1002 "REGEXP_SUBSTR", 1003 expression.this, 1004 expression.expression, 1005 position, 1006 occurrence, 1007 parameters, 1008 group, 1009 ) 1010 1011 def except_op(self, expression: exp.Except) -> str: 1012 if not expression.args.get("distinct"): 1013 self.unsupported("EXCEPT with All is not supported in Snowflake") 1014 return super().except_op(expression) 1015 1016 def intersect_op(self, expression: exp.Intersect) -> str: 1017 if not expression.args.get("distinct"): 1018 self.unsupported("INTERSECT with All is not supported in Snowflake") 1019 return super().intersect_op(expression) 1020 1021 def describe_sql(self, expression: exp.Describe) -> str: 1022 # Default to table if kind is unknown 1023 kind_value = expression.args.get("kind") or "TABLE" 1024 kind = f" {kind_value}" if kind_value else "" 1025 this = f" {self.sql(expression, 'this')}" 1026 expressions = self.expressions(expression, flat=True) 1027 expressions = f" {expressions}" if expressions else "" 1028 return f"DESCRIBE{kind}{this}{expressions}" 1029 1030 def generatedasidentitycolumnconstraint_sql( 1031 self, expression: exp.GeneratedAsIdentityColumnConstraint 1032 ) -> str: 1033 start = expression.args.get("start") 1034 start = f" START {start}" if start else "" 1035 increment = expression.args.get("increment") 1036 increment = f" INCREMENT {increment}" if increment else "" 1037 return f"AUTOINCREMENT{start}{increment}" 1038 1039 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1040 this = self.sql(expression, "this") 1041 return f"SWAP WITH {this}" 1042 1043 def cluster_sql(self, expression: exp.Cluster) -> str: 1044 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1045 1046 def struct_sql(self, expression: exp.Struct) -> str: 1047 keys = [] 1048 values = [] 1049 1050 for i, e in enumerate(expression.expressions): 1051 if isinstance(e, exp.PropertyEQ): 1052 keys.append( 1053 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1054 ) 1055 values.append(e.expression) 1056 else: 1057 keys.append(exp.Literal.string(f"_{i}")) 1058 values.append(e) 1059 1060 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1061 1062 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1063 if expression.args.get("weight") or expression.args.get("accuracy"): 1064 self.unsupported( 1065 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1066 ) 1067 1068 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1069 1070 def alterset_sql(self, expression: exp.AlterSet) -> str: 1071 exprs = self.expressions(expression, flat=True) 1072 exprs = f" {exprs}" if exprs else "" 1073 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1074 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1075 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1076 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1077 tag = self.expressions(expression, key="tag", flat=True) 1078 tag = f" TAG {tag}" if tag else "" 1079 1080 return f"SET{exprs}{file_format}{copy_options}{tag}"
285class Snowflake(Dialect): 286 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 287 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 288 NULL_ORDERING = "nulls_are_large" 289 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 290 SUPPORTS_USER_DEFINED_TYPES = False 291 SUPPORTS_SEMI_ANTI_JOIN = False 292 PREFER_CTE_ALIAS_COLUMN = True 293 TABLESAMPLE_SIZE_IS_PERCENT = True 294 COPY_PARAMS_ARE_CSV = False 295 296 TIME_MAPPING = { 297 "YYYY": "%Y", 298 "yyyy": "%Y", 299 "YY": "%y", 300 "yy": "%y", 301 "MMMM": "%B", 302 "mmmm": "%B", 303 "MON": "%b", 304 "mon": "%b", 305 "MM": "%m", 306 "mm": "%m", 307 "DD": "%d", 308 "dd": "%-d", 309 "DY": "%a", 310 "dy": "%w", 311 "HH24": "%H", 312 "hh24": "%H", 313 "HH12": "%I", 314 "hh12": "%I", 315 "MI": "%M", 316 "mi": "%M", 317 "SS": "%S", 318 "ss": "%S", 319 "FF": "%f", 320 "ff": "%f", 321 "FF6": "%f", 322 "ff6": "%f", 323 } 324 325 def quote_identifier(self, expression: E, identify: bool = True) -> E: 326 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 327 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 328 if ( 329 isinstance(expression, exp.Identifier) 330 and isinstance(expression.parent, exp.Table) 331 and expression.name.lower() == "dual" 332 ): 333 return expression # type: ignore 334 335 return super().quote_identifier(expression, identify=identify) 336 337 class Parser(parser.Parser): 338 IDENTIFY_PIVOT_STRINGS = True 339 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 340 COLON_IS_JSON_EXTRACT = True 341 342 ID_VAR_TOKENS = { 343 *parser.Parser.ID_VAR_TOKENS, 344 TokenType.MATCH_CONDITION, 345 } 346 347 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 348 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 349 350 FUNCTIONS = { 351 **parser.Parser.FUNCTIONS, 352 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 353 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 354 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 355 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 356 this=seq_get(args, 1), expression=seq_get(args, 0) 357 ), 358 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 359 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 360 start=seq_get(args, 0), 361 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 362 step=seq_get(args, 2), 363 ), 364 "BITXOR": binary_from_function(exp.BitwiseXor), 365 "BIT_XOR": binary_from_function(exp.BitwiseXor), 366 "BOOLXOR": binary_from_function(exp.Xor), 367 "CONVERT_TIMEZONE": _build_convert_timezone, 368 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 369 "DATE_TRUNC": _date_trunc_to_time, 370 "DATEADD": _build_date_time_add(exp.DateAdd), 371 "DATEDIFF": _build_datediff, 372 "DIV0": _build_if_from_div0, 373 "FLATTEN": exp.Explode.from_arg_list, 374 "GET_PATH": lambda args, dialect: exp.JSONExtract( 375 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 376 ), 377 "IFF": exp.If.from_arg_list, 378 "LAST_DAY": lambda args: exp.LastDay( 379 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 380 ), 381 "LISTAGG": exp.GroupConcat.from_arg_list, 382 "MEDIAN": lambda args: exp.PercentileCont( 383 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 384 ), 385 "NULLIFZERO": _build_if_from_nullifzero, 386 "OBJECT_CONSTRUCT": _build_object_construct, 387 "REGEXP_REPLACE": _build_regexp_replace, 388 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 389 "RLIKE": exp.RegexpLike.from_arg_list, 390 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 391 "TIMEADD": _build_date_time_add(exp.TimeAdd), 392 "TIMEDIFF": _build_datediff, 393 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 394 "TIMESTAMPDIFF": _build_datediff, 395 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 396 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 397 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 398 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 399 "TO_NUMBER": lambda args: exp.ToNumber( 400 this=seq_get(args, 0), 401 format=seq_get(args, 1), 402 precision=seq_get(args, 2), 403 scale=seq_get(args, 3), 404 ), 405 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 406 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 407 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 408 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 409 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 410 "TO_VARCHAR": exp.ToChar.from_arg_list, 411 "ZEROIFNULL": _build_if_from_zeroifnull, 412 } 413 414 FUNCTION_PARSERS = { 415 **parser.Parser.FUNCTION_PARSERS, 416 "DATE_PART": lambda self: self._parse_date_part(), 417 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 418 } 419 FUNCTION_PARSERS.pop("TRIM") 420 421 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 422 423 RANGE_PARSERS = { 424 **parser.Parser.RANGE_PARSERS, 425 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 426 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 427 } 428 429 ALTER_PARSERS = { 430 **parser.Parser.ALTER_PARSERS, 431 "UNSET": lambda self: self.expression( 432 exp.Set, 433 tag=self._match_text_seq("TAG"), 434 expressions=self._parse_csv(self._parse_id_var), 435 unset=True, 436 ), 437 "SWAP": lambda self: self._parse_alter_table_swap(), 438 } 439 440 STATEMENT_PARSERS = { 441 **parser.Parser.STATEMENT_PARSERS, 442 TokenType.SHOW: lambda self: self._parse_show(), 443 } 444 445 PROPERTY_PARSERS = { 446 **parser.Parser.PROPERTY_PARSERS, 447 "LOCATION": lambda self: self._parse_location_property(), 448 } 449 450 TYPE_CONVERTER = { 451 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 452 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 453 } 454 455 SHOW_PARSERS = { 456 "SCHEMAS": _show_parser("SCHEMAS"), 457 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 458 "OBJECTS": _show_parser("OBJECTS"), 459 "TERSE OBJECTS": _show_parser("OBJECTS"), 460 "TABLES": _show_parser("TABLES"), 461 "TERSE TABLES": _show_parser("TABLES"), 462 "VIEWS": _show_parser("VIEWS"), 463 "TERSE VIEWS": _show_parser("VIEWS"), 464 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 466 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 468 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 470 "SEQUENCES": _show_parser("SEQUENCES"), 471 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 472 "COLUMNS": _show_parser("COLUMNS"), 473 "USERS": _show_parser("USERS"), 474 "TERSE USERS": _show_parser("USERS"), 475 } 476 477 STAGED_FILE_SINGLE_TOKENS = { 478 TokenType.DOT, 479 TokenType.MOD, 480 TokenType.SLASH, 481 } 482 483 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 484 485 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 486 487 LAMBDAS = { 488 **parser.Parser.LAMBDAS, 489 TokenType.ARROW: lambda self, expressions: self.expression( 490 exp.Lambda, 491 this=self._replace_lambda( 492 self._parse_conjunction(), 493 expressions, 494 ), 495 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 496 ), 497 } 498 499 def _parse_create(self) -> exp.Create | exp.Command: 500 expression = super()._parse_create() 501 if isinstance(expression, exp.Create) and expression.kind == "TAG": 502 # Replace the Table node with the enclosed Identifier 503 expression.this.replace(expression.this.this) 504 505 return expression 506 507 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 508 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 509 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 510 this = self._parse_var() or self._parse_type() 511 512 if not this: 513 return None 514 515 self._match(TokenType.COMMA) 516 expression = self._parse_bitwise() 517 this = _map_date_part(this) 518 name = this.name.upper() 519 520 if name.startswith("EPOCH"): 521 if name == "EPOCH_MILLISECOND": 522 scale = 10**3 523 elif name == "EPOCH_MICROSECOND": 524 scale = 10**6 525 elif name == "EPOCH_NANOSECOND": 526 scale = 10**9 527 else: 528 scale = None 529 530 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 531 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 532 533 if scale: 534 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 535 536 return to_unix 537 538 return self.expression(exp.Extract, this=this, expression=expression) 539 540 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 541 if is_map: 542 # Keys are strings in Snowflake's objects, see also: 543 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 544 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 545 return self._parse_slice(self._parse_string()) 546 547 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 548 549 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 550 lateral = super()._parse_lateral() 551 if not lateral: 552 return lateral 553 554 if isinstance(lateral.this, exp.Explode): 555 table_alias = lateral.args.get("alias") 556 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 557 if table_alias and not table_alias.args.get("columns"): 558 table_alias.set("columns", columns) 559 elif not table_alias: 560 exp.alias_(lateral, "_flattened", table=columns, copy=False) 561 562 return lateral 563 564 def _parse_at_before(self, table: exp.Table) -> exp.Table: 565 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 566 index = self._index 567 if self._match_texts(("AT", "BEFORE")): 568 this = self._prev.text.upper() 569 kind = ( 570 self._match(TokenType.L_PAREN) 571 and self._match_texts(self.HISTORICAL_DATA_KIND) 572 and self._prev.text.upper() 573 ) 574 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 575 576 if expression: 577 self._match_r_paren() 578 when = self.expression( 579 exp.HistoricalData, this=this, kind=kind, expression=expression 580 ) 581 table.set("when", when) 582 else: 583 self._retreat(index) 584 585 return table 586 587 def _parse_table_parts( 588 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 589 ) -> exp.Table: 590 # https://docs.snowflake.com/en/user-guide/querying-stage 591 if self._match(TokenType.STRING, advance=False): 592 table = self._parse_string() 593 elif self._match_text_seq("@", advance=False): 594 table = self._parse_location_path() 595 else: 596 table = None 597 598 if table: 599 file_format = None 600 pattern = None 601 602 wrapped = self._match(TokenType.L_PAREN) 603 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 604 if self._match_text_seq("FILE_FORMAT", "=>"): 605 file_format = self._parse_string() or super()._parse_table_parts( 606 is_db_reference=is_db_reference 607 ) 608 elif self._match_text_seq("PATTERN", "=>"): 609 pattern = self._parse_string() 610 else: 611 break 612 613 self._match(TokenType.COMMA) 614 615 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 616 else: 617 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 618 619 return self._parse_at_before(table) 620 621 def _parse_id_var( 622 self, 623 any_token: bool = True, 624 tokens: t.Optional[t.Collection[TokenType]] = None, 625 ) -> t.Optional[exp.Expression]: 626 if self._match_text_seq("IDENTIFIER", "("): 627 identifier = ( 628 super()._parse_id_var(any_token=any_token, tokens=tokens) 629 or self._parse_string() 630 ) 631 self._match_r_paren() 632 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 633 634 return super()._parse_id_var(any_token=any_token, tokens=tokens) 635 636 def _parse_show_snowflake(self, this: str) -> exp.Show: 637 scope = None 638 scope_kind = None 639 640 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 641 # which is syntactically valid but has no effect on the output 642 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 643 644 history = self._match_text_seq("HISTORY") 645 646 like = self._parse_string() if self._match(TokenType.LIKE) else None 647 648 if self._match(TokenType.IN): 649 if self._match_text_seq("ACCOUNT"): 650 scope_kind = "ACCOUNT" 651 elif self._match_set(self.DB_CREATABLES): 652 scope_kind = self._prev.text.upper() 653 if self._curr: 654 scope = self._parse_table_parts() 655 elif self._curr: 656 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 657 scope = self._parse_table_parts() 658 659 return self.expression( 660 exp.Show, 661 **{ 662 "terse": terse, 663 "this": this, 664 "history": history, 665 "like": like, 666 "scope": scope, 667 "scope_kind": scope_kind, 668 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 669 "limit": self._parse_limit(), 670 "from": self._parse_string() if self._match(TokenType.FROM) else None, 671 }, 672 ) 673 674 def _parse_alter_table_swap(self) -> exp.SwapTable: 675 self._match_text_seq("WITH") 676 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 677 678 def _parse_location_property(self) -> exp.LocationProperty: 679 self._match(TokenType.EQ) 680 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 681 682 def _parse_file_location(self) -> t.Optional[exp.Expression]: 683 # Parse either a subquery or a staged file 684 return ( 685 self._parse_select(table=True) 686 if self._match(TokenType.L_PAREN, advance=False) 687 else self._parse_table_parts() 688 ) 689 690 def _parse_location_path(self) -> exp.Var: 691 parts = [self._advance_any(ignore_reserved=True)] 692 693 # We avoid consuming a comma token because external tables like @foo and @bar 694 # can be joined in a query with a comma separator, as well as closing paren 695 # in case of subqueries 696 while self._is_connected() and not self._match_set( 697 (TokenType.COMMA, TokenType.R_PAREN), advance=False 698 ): 699 parts.append(self._advance_any(ignore_reserved=True)) 700 701 return exp.var("".join(part.text for part in parts if part)) 702 703 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 704 this = super()._parse_lambda_arg() 705 706 if not this: 707 return this 708 709 typ = self._parse_types() 710 711 if typ: 712 return self.expression(exp.Cast, this=this, to=typ) 713 714 return this 715 716 class Tokenizer(tokens.Tokenizer): 717 STRING_ESCAPES = ["\\", "'"] 718 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 719 RAW_STRINGS = ["$$"] 720 COMMENTS = ["--", "//", ("/*", "*/")] 721 722 KEYWORDS = { 723 **tokens.Tokenizer.KEYWORDS, 724 "BYTEINT": TokenType.INT, 725 "CHAR VARYING": TokenType.VARCHAR, 726 "CHARACTER VARYING": TokenType.VARCHAR, 727 "EXCLUDE": TokenType.EXCEPT, 728 "ILIKE ANY": TokenType.ILIKE_ANY, 729 "LIKE ANY": TokenType.LIKE_ANY, 730 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 731 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 732 "MINUS": TokenType.EXCEPT, 733 "NCHAR VARYING": TokenType.VARCHAR, 734 "PUT": TokenType.COMMAND, 735 "REMOVE": TokenType.COMMAND, 736 "RM": TokenType.COMMAND, 737 "SAMPLE": TokenType.TABLE_SAMPLE, 738 "SQL_DOUBLE": TokenType.DOUBLE, 739 "SQL_VARCHAR": TokenType.VARCHAR, 740 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 741 "TAG": TokenType.TAG, 742 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 743 "TOP": TokenType.TOP, 744 } 745 746 SINGLE_TOKENS = { 747 **tokens.Tokenizer.SINGLE_TOKENS, 748 "$": TokenType.PARAMETER, 749 } 750 751 VAR_SINGLE_TOKENS = {"$"} 752 753 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 754 755 class Generator(generator.Generator): 756 PARAMETER_TOKEN = "$" 757 MATCHED_BY_SOURCE = False 758 SINGLE_STRING_INTERVAL = True 759 JOIN_HINTS = False 760 TABLE_HINTS = False 761 QUERY_HINTS = False 762 AGGREGATE_FILTER_SUPPORTED = False 763 SUPPORTS_TABLE_COPY = False 764 COLLATE_IS_FUNC = True 765 LIMIT_ONLY_LITERALS = True 766 JSON_KEY_VALUE_PAIR_SEP = "," 767 INSERT_OVERWRITE = " OVERWRITE INTO" 768 STRUCT_DELIMITER = ("(", ")") 769 COPY_PARAMS_ARE_WRAPPED = False 770 COPY_PARAMS_EQ_REQUIRED = True 771 STAR_EXCEPT = "EXCLUDE" 772 773 TRANSFORMS = { 774 **generator.Generator.TRANSFORMS, 775 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 776 exp.ArgMax: rename_func("MAX_BY"), 777 exp.ArgMin: rename_func("MIN_BY"), 778 exp.Array: inline_array_sql, 779 exp.ArrayConcat: rename_func("ARRAY_CAT"), 780 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 781 exp.AtTimeZone: lambda self, e: self.func( 782 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 783 ), 784 exp.BitwiseXor: rename_func("BITXOR"), 785 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 786 exp.DateAdd: date_delta_sql("DATEADD"), 787 exp.DateDiff: date_delta_sql("DATEDIFF"), 788 exp.DateStrToDate: datestrtodate_sql, 789 exp.DayOfMonth: rename_func("DAYOFMONTH"), 790 exp.DayOfWeek: rename_func("DAYOFWEEK"), 791 exp.DayOfYear: rename_func("DAYOFYEAR"), 792 exp.Explode: rename_func("FLATTEN"), 793 exp.Extract: rename_func("DATE_PART"), 794 exp.FromTimeZone: lambda self, e: self.func( 795 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 796 ), 797 exp.GenerateSeries: lambda self, e: self.func( 798 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 799 ), 800 exp.GroupConcat: rename_func("LISTAGG"), 801 exp.If: if_sql(name="IFF", false_value="NULL"), 802 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 803 exp.JSONExtractScalar: lambda self, e: self.func( 804 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 805 ), 806 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 807 exp.JSONPathRoot: lambda *_: "", 808 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 809 exp.LogicalOr: rename_func("BOOLOR_AGG"), 810 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 811 exp.Max: max_or_greatest, 812 exp.Min: min_or_least, 813 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 814 exp.PercentileCont: transforms.preprocess( 815 [transforms.add_within_group_for_percentiles] 816 ), 817 exp.PercentileDisc: transforms.preprocess( 818 [transforms.add_within_group_for_percentiles] 819 ), 820 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 821 exp.RegexpILike: _regexpilike_sql, 822 exp.Rand: rename_func("RANDOM"), 823 exp.Select: transforms.preprocess( 824 [ 825 transforms.eliminate_distinct_on, 826 transforms.explode_to_unnest(), 827 transforms.eliminate_semi_and_anti_joins, 828 ] 829 ), 830 exp.SHA: rename_func("SHA1"), 831 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 832 exp.StartsWith: rename_func("STARTSWITH"), 833 exp.StrPosition: lambda self, e: self.func( 834 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 835 ), 836 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 837 exp.Stuff: rename_func("INSERT"), 838 exp.TimeAdd: date_delta_sql("TIMEADD"), 839 exp.TimestampDiff: lambda self, e: self.func( 840 "TIMESTAMPDIFF", e.unit, e.expression, e.this 841 ), 842 exp.TimestampTrunc: timestamptrunc_sql(), 843 exp.TimeStrToTime: timestrtotime_sql, 844 exp.TimeToStr: lambda self, e: self.func( 845 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 846 ), 847 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 848 exp.ToArray: rename_func("TO_ARRAY"), 849 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 850 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 851 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 852 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 853 exp.TsOrDsToDate: lambda self, e: self.func( 854 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 855 ), 856 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 857 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 858 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 859 exp.Xor: rename_func("BOOLXOR"), 860 } 861 862 SUPPORTED_JSON_PATH_PARTS = { 863 exp.JSONPathKey, 864 exp.JSONPathRoot, 865 exp.JSONPathSubscript, 866 } 867 868 TYPE_MAPPING = { 869 **generator.Generator.TYPE_MAPPING, 870 exp.DataType.Type.NESTED: "OBJECT", 871 exp.DataType.Type.STRUCT: "OBJECT", 872 } 873 874 PROPERTIES_LOCATION = { 875 **generator.Generator.PROPERTIES_LOCATION, 876 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 877 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 878 } 879 880 UNSUPPORTED_VALUES_EXPRESSIONS = { 881 exp.Map, 882 exp.StarMap, 883 exp.Struct, 884 exp.VarMap, 885 } 886 887 def with_properties(self, properties: exp.Properties) -> str: 888 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 889 890 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 891 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 892 values_as_table = False 893 894 return super().values_sql(expression, values_as_table=values_as_table) 895 896 def datatype_sql(self, expression: exp.DataType) -> str: 897 expressions = expression.expressions 898 if ( 899 expressions 900 and expression.is_type(*exp.DataType.STRUCT_TYPES) 901 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 902 ): 903 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 904 return "OBJECT" 905 906 return super().datatype_sql(expression) 907 908 def tonumber_sql(self, expression: exp.ToNumber) -> str: 909 return self.func( 910 "TO_NUMBER", 911 expression.this, 912 expression.args.get("format"), 913 expression.args.get("precision"), 914 expression.args.get("scale"), 915 ) 916 917 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 918 milli = expression.args.get("milli") 919 if milli is not None: 920 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 921 expression.set("nano", milli_to_nano) 922 923 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 924 925 def trycast_sql(self, expression: exp.TryCast) -> str: 926 value = expression.this 927 928 if value.type is None: 929 from sqlglot.optimizer.annotate_types import annotate_types 930 931 value = annotate_types(value) 932 933 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 934 return super().trycast_sql(expression) 935 936 # TRY_CAST only works for string values in Snowflake 937 return self.cast_sql(expression) 938 939 def log_sql(self, expression: exp.Log) -> str: 940 if not expression.expression: 941 return self.func("LN", expression.this) 942 943 return super().log_sql(expression) 944 945 def unnest_sql(self, expression: exp.Unnest) -> str: 946 unnest_alias = expression.args.get("alias") 947 offset = expression.args.get("offset") 948 949 columns = [ 950 exp.to_identifier("seq"), 951 exp.to_identifier("key"), 952 exp.to_identifier("path"), 953 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 954 seq_get(unnest_alias.columns if unnest_alias else [], 0) 955 or exp.to_identifier("value"), 956 exp.to_identifier("this"), 957 ] 958 959 if unnest_alias: 960 unnest_alias.set("columns", columns) 961 else: 962 unnest_alias = exp.TableAlias(this="_u", columns=columns) 963 964 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 965 alias = self.sql(unnest_alias) 966 alias = f" AS {alias}" if alias else "" 967 return f"{explode}{alias}" 968 969 def show_sql(self, expression: exp.Show) -> str: 970 terse = "TERSE " if expression.args.get("terse") else "" 971 history = " HISTORY" if expression.args.get("history") else "" 972 like = self.sql(expression, "like") 973 like = f" LIKE {like}" if like else "" 974 975 scope = self.sql(expression, "scope") 976 scope = f" {scope}" if scope else "" 977 978 scope_kind = self.sql(expression, "scope_kind") 979 if scope_kind: 980 scope_kind = f" IN {scope_kind}" 981 982 starts_with = self.sql(expression, "starts_with") 983 if starts_with: 984 starts_with = f" STARTS WITH {starts_with}" 985 986 limit = self.sql(expression, "limit") 987 988 from_ = self.sql(expression, "from") 989 if from_: 990 from_ = f" FROM {from_}" 991 992 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 993 994 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 995 # Other dialects don't support all of the following parameters, so we need to 996 # generate default values as necessary to ensure the transpilation is correct 997 group = expression.args.get("group") 998 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 999 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1000 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1001 1002 return self.func( 1003 "REGEXP_SUBSTR", 1004 expression.this, 1005 expression.expression, 1006 position, 1007 occurrence, 1008 parameters, 1009 group, 1010 ) 1011 1012 def except_op(self, expression: exp.Except) -> str: 1013 if not expression.args.get("distinct"): 1014 self.unsupported("EXCEPT with All is not supported in Snowflake") 1015 return super().except_op(expression) 1016 1017 def intersect_op(self, expression: exp.Intersect) -> str: 1018 if not expression.args.get("distinct"): 1019 self.unsupported("INTERSECT with All is not supported in Snowflake") 1020 return super().intersect_op(expression) 1021 1022 def describe_sql(self, expression: exp.Describe) -> str: 1023 # Default to table if kind is unknown 1024 kind_value = expression.args.get("kind") or "TABLE" 1025 kind = f" {kind_value}" if kind_value else "" 1026 this = f" {self.sql(expression, 'this')}" 1027 expressions = self.expressions(expression, flat=True) 1028 expressions = f" {expressions}" if expressions else "" 1029 return f"DESCRIBE{kind}{this}{expressions}" 1030 1031 def generatedasidentitycolumnconstraint_sql( 1032 self, expression: exp.GeneratedAsIdentityColumnConstraint 1033 ) -> str: 1034 start = expression.args.get("start") 1035 start = f" START {start}" if start else "" 1036 increment = expression.args.get("increment") 1037 increment = f" INCREMENT {increment}" if increment else "" 1038 return f"AUTOINCREMENT{start}{increment}" 1039 1040 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1041 this = self.sql(expression, "this") 1042 return f"SWAP WITH {this}" 1043 1044 def cluster_sql(self, expression: exp.Cluster) -> str: 1045 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1046 1047 def struct_sql(self, expression: exp.Struct) -> str: 1048 keys = [] 1049 values = [] 1050 1051 for i, e in enumerate(expression.expressions): 1052 if isinstance(e, exp.PropertyEQ): 1053 keys.append( 1054 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1055 ) 1056 values.append(e.expression) 1057 else: 1058 keys.append(exp.Literal.string(f"_{i}")) 1059 values.append(e) 1060 1061 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1062 1063 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1064 if expression.args.get("weight") or expression.args.get("accuracy"): 1065 self.unsupported( 1066 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1067 ) 1068 1069 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1070 1071 def alterset_sql(self, expression: exp.AlterSet) -> str: 1072 exprs = self.expressions(expression, flat=True) 1073 exprs = f" {exprs}" if exprs else "" 1074 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1075 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1076 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1077 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1078 tag = self.expressions(expression, key="tag", flat=True) 1079 tag = f" TAG {tag}" if tag else "" 1080 1081 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
325 def quote_identifier(self, expression: E, identify: bool = True) -> E: 326 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 327 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 328 if ( 329 isinstance(expression, exp.Identifier) 330 and isinstance(expression.parent, exp.Table) 331 and expression.name.lower() == "dual" 332 ): 333 return expression # type: ignore 334 335 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
337 class Parser(parser.Parser): 338 IDENTIFY_PIVOT_STRINGS = True 339 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 340 COLON_IS_JSON_EXTRACT = True 341 342 ID_VAR_TOKENS = { 343 *parser.Parser.ID_VAR_TOKENS, 344 TokenType.MATCH_CONDITION, 345 } 346 347 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 348 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 349 350 FUNCTIONS = { 351 **parser.Parser.FUNCTIONS, 352 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 353 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 354 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 355 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 356 this=seq_get(args, 1), expression=seq_get(args, 0) 357 ), 358 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 359 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 360 start=seq_get(args, 0), 361 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 362 step=seq_get(args, 2), 363 ), 364 "BITXOR": binary_from_function(exp.BitwiseXor), 365 "BIT_XOR": binary_from_function(exp.BitwiseXor), 366 "BOOLXOR": binary_from_function(exp.Xor), 367 "CONVERT_TIMEZONE": _build_convert_timezone, 368 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 369 "DATE_TRUNC": _date_trunc_to_time, 370 "DATEADD": _build_date_time_add(exp.DateAdd), 371 "DATEDIFF": _build_datediff, 372 "DIV0": _build_if_from_div0, 373 "FLATTEN": exp.Explode.from_arg_list, 374 "GET_PATH": lambda args, dialect: exp.JSONExtract( 375 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 376 ), 377 "IFF": exp.If.from_arg_list, 378 "LAST_DAY": lambda args: exp.LastDay( 379 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 380 ), 381 "LISTAGG": exp.GroupConcat.from_arg_list, 382 "MEDIAN": lambda args: exp.PercentileCont( 383 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 384 ), 385 "NULLIFZERO": _build_if_from_nullifzero, 386 "OBJECT_CONSTRUCT": _build_object_construct, 387 "REGEXP_REPLACE": _build_regexp_replace, 388 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 389 "RLIKE": exp.RegexpLike.from_arg_list, 390 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 391 "TIMEADD": _build_date_time_add(exp.TimeAdd), 392 "TIMEDIFF": _build_datediff, 393 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 394 "TIMESTAMPDIFF": _build_datediff, 395 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 396 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 397 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 398 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 399 "TO_NUMBER": lambda args: exp.ToNumber( 400 this=seq_get(args, 0), 401 format=seq_get(args, 1), 402 precision=seq_get(args, 2), 403 scale=seq_get(args, 3), 404 ), 405 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 406 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 407 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 408 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 409 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 410 "TO_VARCHAR": exp.ToChar.from_arg_list, 411 "ZEROIFNULL": _build_if_from_zeroifnull, 412 } 413 414 FUNCTION_PARSERS = { 415 **parser.Parser.FUNCTION_PARSERS, 416 "DATE_PART": lambda self: self._parse_date_part(), 417 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 418 } 419 FUNCTION_PARSERS.pop("TRIM") 420 421 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 422 423 RANGE_PARSERS = { 424 **parser.Parser.RANGE_PARSERS, 425 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 426 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 427 } 428 429 ALTER_PARSERS = { 430 **parser.Parser.ALTER_PARSERS, 431 "UNSET": lambda self: self.expression( 432 exp.Set, 433 tag=self._match_text_seq("TAG"), 434 expressions=self._parse_csv(self._parse_id_var), 435 unset=True, 436 ), 437 "SWAP": lambda self: self._parse_alter_table_swap(), 438 } 439 440 STATEMENT_PARSERS = { 441 **parser.Parser.STATEMENT_PARSERS, 442 TokenType.SHOW: lambda self: self._parse_show(), 443 } 444 445 PROPERTY_PARSERS = { 446 **parser.Parser.PROPERTY_PARSERS, 447 "LOCATION": lambda self: self._parse_location_property(), 448 } 449 450 TYPE_CONVERTER = { 451 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 452 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 453 } 454 455 SHOW_PARSERS = { 456 "SCHEMAS": _show_parser("SCHEMAS"), 457 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 458 "OBJECTS": _show_parser("OBJECTS"), 459 "TERSE OBJECTS": _show_parser("OBJECTS"), 460 "TABLES": _show_parser("TABLES"), 461 "TERSE TABLES": _show_parser("TABLES"), 462 "VIEWS": _show_parser("VIEWS"), 463 "TERSE VIEWS": _show_parser("VIEWS"), 464 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 465 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 466 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 467 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 468 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 469 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 470 "SEQUENCES": _show_parser("SEQUENCES"), 471 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 472 "COLUMNS": _show_parser("COLUMNS"), 473 "USERS": _show_parser("USERS"), 474 "TERSE USERS": _show_parser("USERS"), 475 } 476 477 STAGED_FILE_SINGLE_TOKENS = { 478 TokenType.DOT, 479 TokenType.MOD, 480 TokenType.SLASH, 481 } 482 483 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 484 485 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 486 487 LAMBDAS = { 488 **parser.Parser.LAMBDAS, 489 TokenType.ARROW: lambda self, expressions: self.expression( 490 exp.Lambda, 491 this=self._replace_lambda( 492 self._parse_conjunction(), 493 expressions, 494 ), 495 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 496 ), 497 } 498 499 def _parse_create(self) -> exp.Create | exp.Command: 500 expression = super()._parse_create() 501 if isinstance(expression, exp.Create) and expression.kind == "TAG": 502 # Replace the Table node with the enclosed Identifier 503 expression.this.replace(expression.this.this) 504 505 return expression 506 507 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 508 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 509 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 510 this = self._parse_var() or self._parse_type() 511 512 if not this: 513 return None 514 515 self._match(TokenType.COMMA) 516 expression = self._parse_bitwise() 517 this = _map_date_part(this) 518 name = this.name.upper() 519 520 if name.startswith("EPOCH"): 521 if name == "EPOCH_MILLISECOND": 522 scale = 10**3 523 elif name == "EPOCH_MICROSECOND": 524 scale = 10**6 525 elif name == "EPOCH_NANOSECOND": 526 scale = 10**9 527 else: 528 scale = None 529 530 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 531 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 532 533 if scale: 534 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 535 536 return to_unix 537 538 return self.expression(exp.Extract, this=this, expression=expression) 539 540 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 541 if is_map: 542 # Keys are strings in Snowflake's objects, see also: 543 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 544 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 545 return self._parse_slice(self._parse_string()) 546 547 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 548 549 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 550 lateral = super()._parse_lateral() 551 if not lateral: 552 return lateral 553 554 if isinstance(lateral.this, exp.Explode): 555 table_alias = lateral.args.get("alias") 556 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 557 if table_alias and not table_alias.args.get("columns"): 558 table_alias.set("columns", columns) 559 elif not table_alias: 560 exp.alias_(lateral, "_flattened", table=columns, copy=False) 561 562 return lateral 563 564 def _parse_at_before(self, table: exp.Table) -> exp.Table: 565 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 566 index = self._index 567 if self._match_texts(("AT", "BEFORE")): 568 this = self._prev.text.upper() 569 kind = ( 570 self._match(TokenType.L_PAREN) 571 and self._match_texts(self.HISTORICAL_DATA_KIND) 572 and self._prev.text.upper() 573 ) 574 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 575 576 if expression: 577 self._match_r_paren() 578 when = self.expression( 579 exp.HistoricalData, this=this, kind=kind, expression=expression 580 ) 581 table.set("when", when) 582 else: 583 self._retreat(index) 584 585 return table 586 587 def _parse_table_parts( 588 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 589 ) -> exp.Table: 590 # https://docs.snowflake.com/en/user-guide/querying-stage 591 if self._match(TokenType.STRING, advance=False): 592 table = self._parse_string() 593 elif self._match_text_seq("@", advance=False): 594 table = self._parse_location_path() 595 else: 596 table = None 597 598 if table: 599 file_format = None 600 pattern = None 601 602 wrapped = self._match(TokenType.L_PAREN) 603 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 604 if self._match_text_seq("FILE_FORMAT", "=>"): 605 file_format = self._parse_string() or super()._parse_table_parts( 606 is_db_reference=is_db_reference 607 ) 608 elif self._match_text_seq("PATTERN", "=>"): 609 pattern = self._parse_string() 610 else: 611 break 612 613 self._match(TokenType.COMMA) 614 615 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 616 else: 617 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 618 619 return self._parse_at_before(table) 620 621 def _parse_id_var( 622 self, 623 any_token: bool = True, 624 tokens: t.Optional[t.Collection[TokenType]] = None, 625 ) -> t.Optional[exp.Expression]: 626 if self._match_text_seq("IDENTIFIER", "("): 627 identifier = ( 628 super()._parse_id_var(any_token=any_token, tokens=tokens) 629 or self._parse_string() 630 ) 631 self._match_r_paren() 632 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 633 634 return super()._parse_id_var(any_token=any_token, tokens=tokens) 635 636 def _parse_show_snowflake(self, this: str) -> exp.Show: 637 scope = None 638 scope_kind = None 639 640 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 641 # which is syntactically valid but has no effect on the output 642 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 643 644 history = self._match_text_seq("HISTORY") 645 646 like = self._parse_string() if self._match(TokenType.LIKE) else None 647 648 if self._match(TokenType.IN): 649 if self._match_text_seq("ACCOUNT"): 650 scope_kind = "ACCOUNT" 651 elif self._match_set(self.DB_CREATABLES): 652 scope_kind = self._prev.text.upper() 653 if self._curr: 654 scope = self._parse_table_parts() 655 elif self._curr: 656 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 657 scope = self._parse_table_parts() 658 659 return self.expression( 660 exp.Show, 661 **{ 662 "terse": terse, 663 "this": this, 664 "history": history, 665 "like": like, 666 "scope": scope, 667 "scope_kind": scope_kind, 668 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 669 "limit": self._parse_limit(), 670 "from": self._parse_string() if self._match(TokenType.FROM) else None, 671 }, 672 ) 673 674 def _parse_alter_table_swap(self) -> exp.SwapTable: 675 self._match_text_seq("WITH") 676 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 677 678 def _parse_location_property(self) -> exp.LocationProperty: 679 self._match(TokenType.EQ) 680 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 681 682 def _parse_file_location(self) -> t.Optional[exp.Expression]: 683 # Parse either a subquery or a staged file 684 return ( 685 self._parse_select(table=True) 686 if self._match(TokenType.L_PAREN, advance=False) 687 else self._parse_table_parts() 688 ) 689 690 def _parse_location_path(self) -> exp.Var: 691 parts = [self._advance_any(ignore_reserved=True)] 692 693 # We avoid consuming a comma token because external tables like @foo and @bar 694 # can be joined in a query with a comma separator, as well as closing paren 695 # in case of subqueries 696 while self._is_connected() and not self._match_set( 697 (TokenType.COMMA, TokenType.R_PAREN), advance=False 698 ): 699 parts.append(self._advance_any(ignore_reserved=True)) 700 701 return exp.var("".join(part.text for part in parts if part)) 702 703 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 704 this = super()._parse_lambda_arg() 705 706 if not this: 707 return this 708 709 typ = self._parse_types() 710 711 if typ: 712 return self.expression(exp.Cast, this=this, to=typ) 713 714 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
716 class Tokenizer(tokens.Tokenizer): 717 STRING_ESCAPES = ["\\", "'"] 718 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 719 RAW_STRINGS = ["$$"] 720 COMMENTS = ["--", "//", ("/*", "*/")] 721 722 KEYWORDS = { 723 **tokens.Tokenizer.KEYWORDS, 724 "BYTEINT": TokenType.INT, 725 "CHAR VARYING": TokenType.VARCHAR, 726 "CHARACTER VARYING": TokenType.VARCHAR, 727 "EXCLUDE": TokenType.EXCEPT, 728 "ILIKE ANY": TokenType.ILIKE_ANY, 729 "LIKE ANY": TokenType.LIKE_ANY, 730 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 731 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 732 "MINUS": TokenType.EXCEPT, 733 "NCHAR VARYING": TokenType.VARCHAR, 734 "PUT": TokenType.COMMAND, 735 "REMOVE": TokenType.COMMAND, 736 "RM": TokenType.COMMAND, 737 "SAMPLE": TokenType.TABLE_SAMPLE, 738 "SQL_DOUBLE": TokenType.DOUBLE, 739 "SQL_VARCHAR": TokenType.VARCHAR, 740 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 741 "TAG": TokenType.TAG, 742 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 743 "TOP": TokenType.TOP, 744 } 745 746 SINGLE_TOKENS = { 747 **tokens.Tokenizer.SINGLE_TOKENS, 748 "$": TokenType.PARAMETER, 749 } 750 751 VAR_SINGLE_TOKENS = {"$"} 752 753 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
755 class Generator(generator.Generator): 756 PARAMETER_TOKEN = "$" 757 MATCHED_BY_SOURCE = False 758 SINGLE_STRING_INTERVAL = True 759 JOIN_HINTS = False 760 TABLE_HINTS = False 761 QUERY_HINTS = False 762 AGGREGATE_FILTER_SUPPORTED = False 763 SUPPORTS_TABLE_COPY = False 764 COLLATE_IS_FUNC = True 765 LIMIT_ONLY_LITERALS = True 766 JSON_KEY_VALUE_PAIR_SEP = "," 767 INSERT_OVERWRITE = " OVERWRITE INTO" 768 STRUCT_DELIMITER = ("(", ")") 769 COPY_PARAMS_ARE_WRAPPED = False 770 COPY_PARAMS_EQ_REQUIRED = True 771 STAR_EXCEPT = "EXCLUDE" 772 773 TRANSFORMS = { 774 **generator.Generator.TRANSFORMS, 775 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 776 exp.ArgMax: rename_func("MAX_BY"), 777 exp.ArgMin: rename_func("MIN_BY"), 778 exp.Array: inline_array_sql, 779 exp.ArrayConcat: rename_func("ARRAY_CAT"), 780 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 781 exp.AtTimeZone: lambda self, e: self.func( 782 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 783 ), 784 exp.BitwiseXor: rename_func("BITXOR"), 785 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 786 exp.DateAdd: date_delta_sql("DATEADD"), 787 exp.DateDiff: date_delta_sql("DATEDIFF"), 788 exp.DateStrToDate: datestrtodate_sql, 789 exp.DayOfMonth: rename_func("DAYOFMONTH"), 790 exp.DayOfWeek: rename_func("DAYOFWEEK"), 791 exp.DayOfYear: rename_func("DAYOFYEAR"), 792 exp.Explode: rename_func("FLATTEN"), 793 exp.Extract: rename_func("DATE_PART"), 794 exp.FromTimeZone: lambda self, e: self.func( 795 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 796 ), 797 exp.GenerateSeries: lambda self, e: self.func( 798 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 799 ), 800 exp.GroupConcat: rename_func("LISTAGG"), 801 exp.If: if_sql(name="IFF", false_value="NULL"), 802 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 803 exp.JSONExtractScalar: lambda self, e: self.func( 804 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 805 ), 806 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 807 exp.JSONPathRoot: lambda *_: "", 808 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 809 exp.LogicalOr: rename_func("BOOLOR_AGG"), 810 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 811 exp.Max: max_or_greatest, 812 exp.Min: min_or_least, 813 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 814 exp.PercentileCont: transforms.preprocess( 815 [transforms.add_within_group_for_percentiles] 816 ), 817 exp.PercentileDisc: transforms.preprocess( 818 [transforms.add_within_group_for_percentiles] 819 ), 820 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 821 exp.RegexpILike: _regexpilike_sql, 822 exp.Rand: rename_func("RANDOM"), 823 exp.Select: transforms.preprocess( 824 [ 825 transforms.eliminate_distinct_on, 826 transforms.explode_to_unnest(), 827 transforms.eliminate_semi_and_anti_joins, 828 ] 829 ), 830 exp.SHA: rename_func("SHA1"), 831 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 832 exp.StartsWith: rename_func("STARTSWITH"), 833 exp.StrPosition: lambda self, e: self.func( 834 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 835 ), 836 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 837 exp.Stuff: rename_func("INSERT"), 838 exp.TimeAdd: date_delta_sql("TIMEADD"), 839 exp.TimestampDiff: lambda self, e: self.func( 840 "TIMESTAMPDIFF", e.unit, e.expression, e.this 841 ), 842 exp.TimestampTrunc: timestamptrunc_sql(), 843 exp.TimeStrToTime: timestrtotime_sql, 844 exp.TimeToStr: lambda self, e: self.func( 845 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 846 ), 847 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 848 exp.ToArray: rename_func("TO_ARRAY"), 849 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 850 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 851 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 852 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 853 exp.TsOrDsToDate: lambda self, e: self.func( 854 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 855 ), 856 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 857 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 858 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 859 exp.Xor: rename_func("BOOLXOR"), 860 } 861 862 SUPPORTED_JSON_PATH_PARTS = { 863 exp.JSONPathKey, 864 exp.JSONPathRoot, 865 exp.JSONPathSubscript, 866 } 867 868 TYPE_MAPPING = { 869 **generator.Generator.TYPE_MAPPING, 870 exp.DataType.Type.NESTED: "OBJECT", 871 exp.DataType.Type.STRUCT: "OBJECT", 872 } 873 874 PROPERTIES_LOCATION = { 875 **generator.Generator.PROPERTIES_LOCATION, 876 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 877 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 878 } 879 880 UNSUPPORTED_VALUES_EXPRESSIONS = { 881 exp.Map, 882 exp.StarMap, 883 exp.Struct, 884 exp.VarMap, 885 } 886 887 def with_properties(self, properties: exp.Properties) -> str: 888 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 889 890 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 891 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 892 values_as_table = False 893 894 return super().values_sql(expression, values_as_table=values_as_table) 895 896 def datatype_sql(self, expression: exp.DataType) -> str: 897 expressions = expression.expressions 898 if ( 899 expressions 900 and expression.is_type(*exp.DataType.STRUCT_TYPES) 901 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 902 ): 903 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 904 return "OBJECT" 905 906 return super().datatype_sql(expression) 907 908 def tonumber_sql(self, expression: exp.ToNumber) -> str: 909 return self.func( 910 "TO_NUMBER", 911 expression.this, 912 expression.args.get("format"), 913 expression.args.get("precision"), 914 expression.args.get("scale"), 915 ) 916 917 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 918 milli = expression.args.get("milli") 919 if milli is not None: 920 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 921 expression.set("nano", milli_to_nano) 922 923 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 924 925 def trycast_sql(self, expression: exp.TryCast) -> str: 926 value = expression.this 927 928 if value.type is None: 929 from sqlglot.optimizer.annotate_types import annotate_types 930 931 value = annotate_types(value) 932 933 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 934 return super().trycast_sql(expression) 935 936 # TRY_CAST only works for string values in Snowflake 937 return self.cast_sql(expression) 938 939 def log_sql(self, expression: exp.Log) -> str: 940 if not expression.expression: 941 return self.func("LN", expression.this) 942 943 return super().log_sql(expression) 944 945 def unnest_sql(self, expression: exp.Unnest) -> str: 946 unnest_alias = expression.args.get("alias") 947 offset = expression.args.get("offset") 948 949 columns = [ 950 exp.to_identifier("seq"), 951 exp.to_identifier("key"), 952 exp.to_identifier("path"), 953 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 954 seq_get(unnest_alias.columns if unnest_alias else [], 0) 955 or exp.to_identifier("value"), 956 exp.to_identifier("this"), 957 ] 958 959 if unnest_alias: 960 unnest_alias.set("columns", columns) 961 else: 962 unnest_alias = exp.TableAlias(this="_u", columns=columns) 963 964 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 965 alias = self.sql(unnest_alias) 966 alias = f" AS {alias}" if alias else "" 967 return f"{explode}{alias}" 968 969 def show_sql(self, expression: exp.Show) -> str: 970 terse = "TERSE " if expression.args.get("terse") else "" 971 history = " HISTORY" if expression.args.get("history") else "" 972 like = self.sql(expression, "like") 973 like = f" LIKE {like}" if like else "" 974 975 scope = self.sql(expression, "scope") 976 scope = f" {scope}" if scope else "" 977 978 scope_kind = self.sql(expression, "scope_kind") 979 if scope_kind: 980 scope_kind = f" IN {scope_kind}" 981 982 starts_with = self.sql(expression, "starts_with") 983 if starts_with: 984 starts_with = f" STARTS WITH {starts_with}" 985 986 limit = self.sql(expression, "limit") 987 988 from_ = self.sql(expression, "from") 989 if from_: 990 from_ = f" FROM {from_}" 991 992 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 993 994 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 995 # Other dialects don't support all of the following parameters, so we need to 996 # generate default values as necessary to ensure the transpilation is correct 997 group = expression.args.get("group") 998 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 999 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1000 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1001 1002 return self.func( 1003 "REGEXP_SUBSTR", 1004 expression.this, 1005 expression.expression, 1006 position, 1007 occurrence, 1008 parameters, 1009 group, 1010 ) 1011 1012 def except_op(self, expression: exp.Except) -> str: 1013 if not expression.args.get("distinct"): 1014 self.unsupported("EXCEPT with All is not supported in Snowflake") 1015 return super().except_op(expression) 1016 1017 def intersect_op(self, expression: exp.Intersect) -> str: 1018 if not expression.args.get("distinct"): 1019 self.unsupported("INTERSECT with All is not supported in Snowflake") 1020 return super().intersect_op(expression) 1021 1022 def describe_sql(self, expression: exp.Describe) -> str: 1023 # Default to table if kind is unknown 1024 kind_value = expression.args.get("kind") or "TABLE" 1025 kind = f" {kind_value}" if kind_value else "" 1026 this = f" {self.sql(expression, 'this')}" 1027 expressions = self.expressions(expression, flat=True) 1028 expressions = f" {expressions}" if expressions else "" 1029 return f"DESCRIBE{kind}{this}{expressions}" 1030 1031 def generatedasidentitycolumnconstraint_sql( 1032 self, expression: exp.GeneratedAsIdentityColumnConstraint 1033 ) -> str: 1034 start = expression.args.get("start") 1035 start = f" START {start}" if start else "" 1036 increment = expression.args.get("increment") 1037 increment = f" INCREMENT {increment}" if increment else "" 1038 return f"AUTOINCREMENT{start}{increment}" 1039 1040 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1041 this = self.sql(expression, "this") 1042 return f"SWAP WITH {this}" 1043 1044 def cluster_sql(self, expression: exp.Cluster) -> str: 1045 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1046 1047 def struct_sql(self, expression: exp.Struct) -> str: 1048 keys = [] 1049 values = [] 1050 1051 for i, e in enumerate(expression.expressions): 1052 if isinstance(e, exp.PropertyEQ): 1053 keys.append( 1054 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1055 ) 1056 values.append(e.expression) 1057 else: 1058 keys.append(exp.Literal.string(f"_{i}")) 1059 values.append(e) 1060 1061 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1062 1063 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1064 if expression.args.get("weight") or expression.args.get("accuracy"): 1065 self.unsupported( 1066 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1067 ) 1068 1069 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1070 1071 def alterset_sql(self, expression: exp.AlterSet) -> str: 1072 exprs = self.expressions(expression, flat=True) 1073 exprs = f" {exprs}" if exprs else "" 1074 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1075 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1076 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1077 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1078 tag = self.expressions(expression, key="tag", flat=True) 1079 tag = f" TAG {tag}" if tag else "" 1080 1081 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
896 def datatype_sql(self, expression: exp.DataType) -> str: 897 expressions = expression.expressions 898 if ( 899 expressions 900 and expression.is_type(*exp.DataType.STRUCT_TYPES) 901 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 902 ): 903 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 904 return "OBJECT" 905 906 return super().datatype_sql(expression)
917 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 918 milli = expression.args.get("milli") 919 if milli is not None: 920 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 921 expression.set("nano", milli_to_nano) 922 923 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
925 def trycast_sql(self, expression: exp.TryCast) -> str: 926 value = expression.this 927 928 if value.type is None: 929 from sqlglot.optimizer.annotate_types import annotate_types 930 931 value = annotate_types(value) 932 933 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 934 return super().trycast_sql(expression) 935 936 # TRY_CAST only works for string values in Snowflake 937 return self.cast_sql(expression)
945 def unnest_sql(self, expression: exp.Unnest) -> str: 946 unnest_alias = expression.args.get("alias") 947 offset = expression.args.get("offset") 948 949 columns = [ 950 exp.to_identifier("seq"), 951 exp.to_identifier("key"), 952 exp.to_identifier("path"), 953 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 954 seq_get(unnest_alias.columns if unnest_alias else [], 0) 955 or exp.to_identifier("value"), 956 exp.to_identifier("this"), 957 ] 958 959 if unnest_alias: 960 unnest_alias.set("columns", columns) 961 else: 962 unnest_alias = exp.TableAlias(this="_u", columns=columns) 963 964 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 965 alias = self.sql(unnest_alias) 966 alias = f" AS {alias}" if alias else "" 967 return f"{explode}{alias}"
969 def show_sql(self, expression: exp.Show) -> str: 970 terse = "TERSE " if expression.args.get("terse") else "" 971 history = " HISTORY" if expression.args.get("history") else "" 972 like = self.sql(expression, "like") 973 like = f" LIKE {like}" if like else "" 974 975 scope = self.sql(expression, "scope") 976 scope = f" {scope}" if scope else "" 977 978 scope_kind = self.sql(expression, "scope_kind") 979 if scope_kind: 980 scope_kind = f" IN {scope_kind}" 981 982 starts_with = self.sql(expression, "starts_with") 983 if starts_with: 984 starts_with = f" STARTS WITH {starts_with}" 985 986 limit = self.sql(expression, "limit") 987 988 from_ = self.sql(expression, "from") 989 if from_: 990 from_ = f" FROM {from_}" 991 992 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
994 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 995 # Other dialects don't support all of the following parameters, so we need to 996 # generate default values as necessary to ensure the transpilation is correct 997 group = expression.args.get("group") 998 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 999 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1000 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1001 1002 return self.func( 1003 "REGEXP_SUBSTR", 1004 expression.this, 1005 expression.expression, 1006 position, 1007 occurrence, 1008 parameters, 1009 group, 1010 )
1022 def describe_sql(self, expression: exp.Describe) -> str: 1023 # Default to table if kind is unknown 1024 kind_value = expression.args.get("kind") or "TABLE" 1025 kind = f" {kind_value}" if kind_value else "" 1026 this = f" {self.sql(expression, 'this')}" 1027 expressions = self.expressions(expression, flat=True) 1028 expressions = f" {expressions}" if expressions else "" 1029 return f"DESCRIBE{kind}{this}{expressions}"
1031 def generatedasidentitycolumnconstraint_sql( 1032 self, expression: exp.GeneratedAsIdentityColumnConstraint 1033 ) -> str: 1034 start = expression.args.get("start") 1035 start = f" START {start}" if start else "" 1036 increment = expression.args.get("increment") 1037 increment = f" INCREMENT {increment}" if increment else "" 1038 return f"AUTOINCREMENT{start}{increment}"
1047 def struct_sql(self, expression: exp.Struct) -> str: 1048 keys = [] 1049 values = [] 1050 1051 for i, e in enumerate(expression.expressions): 1052 if isinstance(e, exp.PropertyEQ): 1053 keys.append( 1054 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1055 ) 1056 values.append(e.expression) 1057 else: 1058 keys.append(exp.Literal.string(f"_{i}")) 1059 values.append(e) 1060 1061 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1063 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1064 if expression.args.get("weight") or expression.args.get("accuracy"): 1065 self.unsupported( 1066 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1067 ) 1068 1069 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1071 def alterset_sql(self, expression: exp.AlterSet) -> str: 1072 exprs = self.expressions(expression, flat=True) 1073 exprs = f" {exprs}" if exprs else "" 1074 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1075 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1076 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1077 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1078 tag = self.expressions(expression, key="tag", flat=True) 1079 tag = f" TAG {tag}" if tag else "" 1080 1081 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- OUTER_UNION_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql