sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25 no_safe_divide_sql, 26 no_timestamp_sql, 27 timestampdiff_sql, 28 no_make_interval_sql, 29) 30from sqlglot.generator import unsupported_args 31from sqlglot.helper import flatten, is_float, is_int, seq_get 32from sqlglot.tokens import TokenType 33 34if t.TYPE_CHECKING: 35 from sqlglot._typing import E 36 37 38# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 39def _build_datetime( 40 name: str, kind: exp.DataType.Type, safe: bool = False 41) -> t.Callable[[t.List], exp.Func]: 42 def _builder(args: t.List) -> exp.Func: 43 value = seq_get(args, 0) 44 scale_or_fmt = seq_get(args, 1) 45 46 int_value = value is not None and is_int(value.name) 47 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 48 49 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 50 # Converts calls like `TO_TIME('01:02:03')` into casts 51 if len(args) == 1 and value.is_string and not int_value: 52 return ( 53 exp.TryCast(this=value, to=exp.DataType.build(kind)) 54 if safe 55 else exp.cast(value, kind) 56 ) 57 58 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 59 # cases so we can transpile them, since they're relatively common 60 if kind == exp.DataType.Type.TIMESTAMP: 61 if not safe and (int_value or int_scale_or_fmt): 62 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 63 # it's not easily transpilable 64 return exp.UnixToTime(this=value, scale=scale_or_fmt) 65 if not int_scale_or_fmt and not is_float(value.name): 66 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 67 expr.set("safe", safe) 68 return expr 69 70 if kind == exp.DataType.Type.DATE and not int_value: 71 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 72 formatted_exp.set("safe", safe) 73 return formatted_exp 74 75 return exp.Anonymous(this=name, expressions=args) 76 77 return _builder 78 79 80def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 81 expression = parser.build_var_map(args) 82 83 if isinstance(expression, exp.StarMap): 84 return expression 85 86 return exp.Struct( 87 expressions=[ 88 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 89 ] 90 ) 91 92 93def _build_datediff(args: t.List) -> exp.DateDiff: 94 return exp.DateDiff( 95 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 96 ) 97 98 99def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 100 def _builder(args: t.List) -> E: 101 return expr_type( 102 this=seq_get(args, 2), 103 expression=seq_get(args, 1), 104 unit=map_date_part(seq_get(args, 0)), 105 ) 106 107 return _builder 108 109 110# https://docs.snowflake.com/en/sql-reference/functions/div0 111def _build_if_from_div0(args: t.List) -> exp.If: 112 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 113 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 114 115 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 116 exp.Is(this=lhs, expression=exp.null()).not_() 117 ) 118 true = exp.Literal.number(0) 119 false = exp.Div(this=lhs, expression=rhs) 120 return exp.If(this=cond, true=true, false=false) 121 122 123# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 124def _build_if_from_zeroifnull(args: t.List) -> exp.If: 125 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 126 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 127 128 129# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 130def _build_if_from_nullifzero(args: t.List) -> exp.If: 131 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 132 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 133 134 135def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 136 flag = expression.text("flag") 137 138 if "i" not in flag: 139 flag += "i" 140 141 return self.func( 142 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 143 ) 144 145 146def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 147 regexp_replace = exp.RegexpReplace.from_arg_list(args) 148 149 if not regexp_replace.args.get("replacement"): 150 regexp_replace.set("replacement", exp.Literal.string("")) 151 152 return regexp_replace 153 154 155def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 156 def _parse(self: Snowflake.Parser) -> exp.Show: 157 return self._parse_show_snowflake(*args, **kwargs) 158 159 return _parse 160 161 162def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 163 trunc = date_trunc_to_time(args) 164 trunc.set("unit", map_date_part(trunc.args["unit"])) 165 return trunc 166 167 168def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 169 """ 170 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 171 so we need to unqualify them. 172 173 Example: 174 >>> from sqlglot import parse_one 175 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 176 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 177 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 178 """ 179 if isinstance(expression, exp.Pivot) and expression.unpivot: 180 expression = transforms.unqualify_columns(expression) 181 182 return expression 183 184 185def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 186 assert isinstance(expression, exp.Create) 187 188 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 189 if expression.this in exp.DataType.NESTED_TYPES: 190 expression.set("expressions", None) 191 return expression 192 193 props = expression.args.get("properties") 194 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 195 for schema_expression in expression.this.expressions: 196 if isinstance(schema_expression, exp.ColumnDef): 197 column_type = schema_expression.kind 198 if isinstance(column_type, exp.DataType): 199 column_type.transform(_flatten_structured_type, copy=False) 200 201 return expression 202 203 204def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 205 generate_date_array = unnest.expressions[0] 206 start = generate_date_array.args.get("start") 207 end = generate_date_array.args.get("end") 208 step = generate_date_array.args.get("step") 209 210 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 211 return 212 213 unit = step.args.get("unit") 214 215 unnest_alias = unnest.args.get("alias") 216 if unnest_alias: 217 unnest_alias = unnest_alias.copy() 218 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 219 else: 220 sequence_value_name = "value" 221 222 # We'll add the next sequence value to the starting date and project the result 223 date_add = _build_date_time_add(exp.DateAdd)( 224 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 225 ).as_(sequence_value_name) 226 227 # We use DATEDIFF to compute the number of sequence values needed 228 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 229 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 230 ) 231 232 unnest.set("expressions", [number_sequence]) 233 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 234 235 236def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 237 if isinstance(expression, exp.Select): 238 for generate_date_array in expression.find_all(exp.GenerateDateArray): 239 parent = generate_date_array.parent 240 241 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 242 # query is the following (it'll be unnested properly on the next iteration due to copy): 243 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 244 if not isinstance(parent, exp.Unnest): 245 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 246 generate_date_array.replace( 247 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 248 ) 249 250 if ( 251 isinstance(parent, exp.Unnest) 252 and isinstance(parent.parent, (exp.From, exp.Join)) 253 and len(parent.expressions) == 1 254 ): 255 _unnest_generate_date_array(parent) 256 257 return expression 258 259 260def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 261 def _builder(args: t.List) -> E: 262 return expr_type( 263 this=seq_get(args, 0), 264 expression=seq_get(args, 1), 265 position=seq_get(args, 2), 266 occurrence=seq_get(args, 3), 267 parameters=seq_get(args, 4), 268 group=seq_get(args, 5) or exp.Literal.number(0), 269 ) 270 271 return _builder 272 273 274def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 275 # Other dialects don't support all of the following parameters, so we need to 276 # generate default values as necessary to ensure the transpilation is correct 277 group = expression.args.get("group") 278 279 # To avoid generating all these default values, we set group to None if 280 # it's 0 (also default value) which doesn't trigger the following chain 281 if group and group.name == "0": 282 group = None 283 284 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 285 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 286 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 287 288 return self.func( 289 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 290 expression.this, 291 expression.expression, 292 position, 293 occurrence, 294 parameters, 295 group, 296 ) 297 298 299def _json_extract_value_array_sql( 300 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 301) -> str: 302 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 303 ident = exp.to_identifier("x") 304 305 if isinstance(expression, exp.JSONValueArray): 306 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 307 else: 308 this = exp.ParseJSON(this=f"TO_JSON({ident})") 309 310 transform_lambda = exp.Lambda(expressions=[ident], this=this) 311 312 return self.func("TRANSFORM", json_extract, transform_lambda) 313 314 315class Snowflake(Dialect): 316 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 317 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 318 NULL_ORDERING = "nulls_are_large" 319 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 320 SUPPORTS_USER_DEFINED_TYPES = False 321 SUPPORTS_SEMI_ANTI_JOIN = False 322 PREFER_CTE_ALIAS_COLUMN = True 323 TABLESAMPLE_SIZE_IS_PERCENT = True 324 COPY_PARAMS_ARE_CSV = False 325 ARRAY_AGG_INCLUDES_NULLS = None 326 327 TIME_MAPPING = { 328 "YYYY": "%Y", 329 "yyyy": "%Y", 330 "YY": "%y", 331 "yy": "%y", 332 "MMMM": "%B", 333 "mmmm": "%B", 334 "MON": "%b", 335 "mon": "%b", 336 "MM": "%m", 337 "mm": "%m", 338 "DD": "%d", 339 "dd": "%-d", 340 "DY": "%a", 341 "dy": "%w", 342 "HH24": "%H", 343 "hh24": "%H", 344 "HH12": "%I", 345 "hh12": "%I", 346 "MI": "%M", 347 "mi": "%M", 348 "SS": "%S", 349 "ss": "%S", 350 "FF": "%f", 351 "ff": "%f", 352 "FF6": "%f", 353 "ff6": "%f", 354 } 355 356 def quote_identifier(self, expression: E, identify: bool = True) -> E: 357 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 358 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 359 if ( 360 isinstance(expression, exp.Identifier) 361 and isinstance(expression.parent, exp.Table) 362 and expression.name.lower() == "dual" 363 ): 364 return expression # type: ignore 365 366 return super().quote_identifier(expression, identify=identify) 367 368 class Parser(parser.Parser): 369 IDENTIFY_PIVOT_STRINGS = True 370 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 371 COLON_IS_VARIANT_EXTRACT = True 372 373 ID_VAR_TOKENS = { 374 *parser.Parser.ID_VAR_TOKENS, 375 TokenType.MATCH_CONDITION, 376 } 377 378 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 379 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 380 381 FUNCTIONS = { 382 **parser.Parser.FUNCTIONS, 383 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 384 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 385 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 386 this=seq_get(args, 1), expression=seq_get(args, 0) 387 ), 388 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 389 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 390 start=seq_get(args, 0), 391 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 392 step=seq_get(args, 2), 393 ), 394 "BITXOR": binary_from_function(exp.BitwiseXor), 395 "BIT_XOR": binary_from_function(exp.BitwiseXor), 396 "BOOLXOR": binary_from_function(exp.Xor), 397 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 398 "DATE_TRUNC": _date_trunc_to_time, 399 "DATEADD": _build_date_time_add(exp.DateAdd), 400 "DATEDIFF": _build_datediff, 401 "DIV0": _build_if_from_div0, 402 "EDITDISTANCE": lambda args: exp.Levenshtein( 403 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 404 ), 405 "FLATTEN": exp.Explode.from_arg_list, 406 "GET_PATH": lambda args, dialect: exp.JSONExtract( 407 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 408 ), 409 "IFF": exp.If.from_arg_list, 410 "LAST_DAY": lambda args: exp.LastDay( 411 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 412 ), 413 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 414 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 415 "LISTAGG": exp.GroupConcat.from_arg_list, 416 "NULLIFZERO": _build_if_from_nullifzero, 417 "OBJECT_CONSTRUCT": _build_object_construct, 418 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 419 "REGEXP_REPLACE": _build_regexp_replace, 420 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 421 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 422 "RLIKE": exp.RegexpLike.from_arg_list, 423 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 424 "TIMEADD": _build_date_time_add(exp.TimeAdd), 425 "TIMEDIFF": _build_datediff, 426 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 427 "TIMESTAMPDIFF": _build_datediff, 428 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 429 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 430 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 431 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 432 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 433 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 434 "TRY_TO_TIMESTAMP": _build_datetime( 435 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 436 ), 437 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 438 "TO_NUMBER": lambda args: exp.ToNumber( 439 this=seq_get(args, 0), 440 format=seq_get(args, 1), 441 precision=seq_get(args, 2), 442 scale=seq_get(args, 3), 443 ), 444 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 445 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 446 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 447 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 448 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 449 "TO_VARCHAR": exp.ToChar.from_arg_list, 450 "ZEROIFNULL": _build_if_from_zeroifnull, 451 } 452 453 FUNCTION_PARSERS = { 454 **parser.Parser.FUNCTION_PARSERS, 455 "DATE_PART": lambda self: self._parse_date_part(), 456 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 457 } 458 FUNCTION_PARSERS.pop("TRIM") 459 460 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 461 462 RANGE_PARSERS = { 463 **parser.Parser.RANGE_PARSERS, 464 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 465 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 466 } 467 468 ALTER_PARSERS = { 469 **parser.Parser.ALTER_PARSERS, 470 "UNSET": lambda self: self.expression( 471 exp.Set, 472 tag=self._match_text_seq("TAG"), 473 expressions=self._parse_csv(self._parse_id_var), 474 unset=True, 475 ), 476 } 477 478 STATEMENT_PARSERS = { 479 **parser.Parser.STATEMENT_PARSERS, 480 TokenType.SHOW: lambda self: self._parse_show(), 481 } 482 483 PROPERTY_PARSERS = { 484 **parser.Parser.PROPERTY_PARSERS, 485 "LOCATION": lambda self: self._parse_location_property(), 486 "TAG": lambda self: self._parse_tag(), 487 } 488 489 TYPE_CONVERTERS = { 490 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 491 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 492 } 493 494 SHOW_PARSERS = { 495 "SCHEMAS": _show_parser("SCHEMAS"), 496 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 497 "OBJECTS": _show_parser("OBJECTS"), 498 "TERSE OBJECTS": _show_parser("OBJECTS"), 499 "TABLES": _show_parser("TABLES"), 500 "TERSE TABLES": _show_parser("TABLES"), 501 "VIEWS": _show_parser("VIEWS"), 502 "TERSE VIEWS": _show_parser("VIEWS"), 503 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 504 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 505 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 506 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 507 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 508 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 509 "SEQUENCES": _show_parser("SEQUENCES"), 510 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 511 "COLUMNS": _show_parser("COLUMNS"), 512 "USERS": _show_parser("USERS"), 513 "TERSE USERS": _show_parser("USERS"), 514 } 515 516 CONSTRAINT_PARSERS = { 517 **parser.Parser.CONSTRAINT_PARSERS, 518 "WITH": lambda self: self._parse_with_constraint(), 519 "MASKING": lambda self: self._parse_with_constraint(), 520 "PROJECTION": lambda self: self._parse_with_constraint(), 521 "TAG": lambda self: self._parse_with_constraint(), 522 } 523 524 STAGED_FILE_SINGLE_TOKENS = { 525 TokenType.DOT, 526 TokenType.MOD, 527 TokenType.SLASH, 528 } 529 530 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 531 532 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 533 534 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 535 536 LAMBDAS = { 537 **parser.Parser.LAMBDAS, 538 TokenType.ARROW: lambda self, expressions: self.expression( 539 exp.Lambda, 540 this=self._replace_lambda( 541 self._parse_assignment(), 542 expressions, 543 ), 544 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 545 ), 546 } 547 548 def _negate_range( 549 self, this: t.Optional[exp.Expression] = None 550 ) -> t.Optional[exp.Expression]: 551 if not this: 552 return this 553 554 query = this.args.get("query") 555 if isinstance(this, exp.In) and isinstance(query, exp.Query): 556 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 557 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 558 # which can produce different results (most likely a SnowFlake bug). 559 # 560 # https://docs.snowflake.com/en/sql-reference/functions/in 561 # Context: https://github.com/tobymao/sqlglot/issues/3890 562 return self.expression( 563 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 564 ) 565 566 return self.expression(exp.Not, this=this) 567 568 def _parse_tag(self) -> exp.Tags: 569 return self.expression( 570 exp.Tags, 571 expressions=self._parse_wrapped_csv(self._parse_property), 572 ) 573 574 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 575 if self._prev.token_type != TokenType.WITH: 576 self._retreat(self._index - 1) 577 578 if self._match_text_seq("MASKING", "POLICY"): 579 policy = self._parse_column() 580 return self.expression( 581 exp.MaskingPolicyColumnConstraint, 582 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 583 expressions=self._match(TokenType.USING) 584 and self._parse_wrapped_csv(self._parse_id_var), 585 ) 586 if self._match_text_seq("PROJECTION", "POLICY"): 587 policy = self._parse_column() 588 return self.expression( 589 exp.ProjectionPolicyColumnConstraint, 590 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 591 ) 592 if self._match(TokenType.TAG): 593 return self._parse_tag() 594 595 return None 596 597 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 598 if self._match(TokenType.TAG): 599 return self._parse_tag() 600 601 return super()._parse_with_property() 602 603 def _parse_create(self) -> exp.Create | exp.Command: 604 expression = super()._parse_create() 605 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 606 # Replace the Table node with the enclosed Identifier 607 expression.this.replace(expression.this.this) 608 609 return expression 610 611 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 612 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 613 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 614 this = self._parse_var() or self._parse_type() 615 616 if not this: 617 return None 618 619 self._match(TokenType.COMMA) 620 expression = self._parse_bitwise() 621 this = map_date_part(this) 622 name = this.name.upper() 623 624 if name.startswith("EPOCH"): 625 if name == "EPOCH_MILLISECOND": 626 scale = 10**3 627 elif name == "EPOCH_MICROSECOND": 628 scale = 10**6 629 elif name == "EPOCH_NANOSECOND": 630 scale = 10**9 631 else: 632 scale = None 633 634 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 635 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 636 637 if scale: 638 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 639 640 return to_unix 641 642 return self.expression(exp.Extract, this=this, expression=expression) 643 644 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 645 if is_map: 646 # Keys are strings in Snowflake's objects, see also: 647 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 648 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 649 return self._parse_slice(self._parse_string()) 650 651 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 652 653 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 654 lateral = super()._parse_lateral() 655 if not lateral: 656 return lateral 657 658 if isinstance(lateral.this, exp.Explode): 659 table_alias = lateral.args.get("alias") 660 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 661 if table_alias and not table_alias.args.get("columns"): 662 table_alias.set("columns", columns) 663 elif not table_alias: 664 exp.alias_(lateral, "_flattened", table=columns, copy=False) 665 666 return lateral 667 668 def _parse_table_parts( 669 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 670 ) -> exp.Table: 671 # https://docs.snowflake.com/en/user-guide/querying-stage 672 if self._match(TokenType.STRING, advance=False): 673 table = self._parse_string() 674 elif self._match_text_seq("@", advance=False): 675 table = self._parse_location_path() 676 else: 677 table = None 678 679 if table: 680 file_format = None 681 pattern = None 682 683 wrapped = self._match(TokenType.L_PAREN) 684 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 685 if self._match_text_seq("FILE_FORMAT", "=>"): 686 file_format = self._parse_string() or super()._parse_table_parts( 687 is_db_reference=is_db_reference 688 ) 689 elif self._match_text_seq("PATTERN", "=>"): 690 pattern = self._parse_string() 691 else: 692 break 693 694 self._match(TokenType.COMMA) 695 696 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 697 else: 698 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 699 700 return table 701 702 def _parse_id_var( 703 self, 704 any_token: bool = True, 705 tokens: t.Optional[t.Collection[TokenType]] = None, 706 ) -> t.Optional[exp.Expression]: 707 if self._match_text_seq("IDENTIFIER", "("): 708 identifier = ( 709 super()._parse_id_var(any_token=any_token, tokens=tokens) 710 or self._parse_string() 711 ) 712 self._match_r_paren() 713 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 714 715 return super()._parse_id_var(any_token=any_token, tokens=tokens) 716 717 def _parse_show_snowflake(self, this: str) -> exp.Show: 718 scope = None 719 scope_kind = None 720 721 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 722 # which is syntactically valid but has no effect on the output 723 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 724 725 history = self._match_text_seq("HISTORY") 726 727 like = self._parse_string() if self._match(TokenType.LIKE) else None 728 729 if self._match(TokenType.IN): 730 if self._match_text_seq("ACCOUNT"): 731 scope_kind = "ACCOUNT" 732 elif self._match_set(self.DB_CREATABLES): 733 scope_kind = self._prev.text.upper() 734 if self._curr: 735 scope = self._parse_table_parts() 736 elif self._curr: 737 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 738 scope = self._parse_table_parts() 739 740 return self.expression( 741 exp.Show, 742 **{ 743 "terse": terse, 744 "this": this, 745 "history": history, 746 "like": like, 747 "scope": scope, 748 "scope_kind": scope_kind, 749 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 750 "limit": self._parse_limit(), 751 "from": self._parse_string() if self._match(TokenType.FROM) else None, 752 }, 753 ) 754 755 def _parse_location_property(self) -> exp.LocationProperty: 756 self._match(TokenType.EQ) 757 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 758 759 def _parse_file_location(self) -> t.Optional[exp.Expression]: 760 # Parse either a subquery or a staged file 761 return ( 762 self._parse_select(table=True, parse_subquery_alias=False) 763 if self._match(TokenType.L_PAREN, advance=False) 764 else self._parse_table_parts() 765 ) 766 767 def _parse_location_path(self) -> exp.Var: 768 parts = [self._advance_any(ignore_reserved=True)] 769 770 # We avoid consuming a comma token because external tables like @foo and @bar 771 # can be joined in a query with a comma separator, as well as closing paren 772 # in case of subqueries 773 while self._is_connected() and not self._match_set( 774 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 775 ): 776 parts.append(self._advance_any(ignore_reserved=True)) 777 778 return exp.var("".join(part.text for part in parts if part)) 779 780 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 781 this = super()._parse_lambda_arg() 782 783 if not this: 784 return this 785 786 typ = self._parse_types() 787 788 if typ: 789 return self.expression(exp.Cast, this=this, to=typ) 790 791 return this 792 793 class Tokenizer(tokens.Tokenizer): 794 STRING_ESCAPES = ["\\", "'"] 795 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 796 RAW_STRINGS = ["$$"] 797 COMMENTS = ["--", "//", ("/*", "*/")] 798 NESTED_COMMENTS = False 799 800 KEYWORDS = { 801 **tokens.Tokenizer.KEYWORDS, 802 "BYTEINT": TokenType.INT, 803 "CHAR VARYING": TokenType.VARCHAR, 804 "CHARACTER VARYING": TokenType.VARCHAR, 805 "EXCLUDE": TokenType.EXCEPT, 806 "ILIKE ANY": TokenType.ILIKE_ANY, 807 "LIKE ANY": TokenType.LIKE_ANY, 808 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 809 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 810 "MINUS": TokenType.EXCEPT, 811 "NCHAR VARYING": TokenType.VARCHAR, 812 "PUT": TokenType.COMMAND, 813 "REMOVE": TokenType.COMMAND, 814 "RM": TokenType.COMMAND, 815 "SAMPLE": TokenType.TABLE_SAMPLE, 816 "SQL_DOUBLE": TokenType.DOUBLE, 817 "SQL_VARCHAR": TokenType.VARCHAR, 818 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 819 "TAG": TokenType.TAG, 820 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 821 "TOP": TokenType.TOP, 822 "WAREHOUSE": TokenType.WAREHOUSE, 823 "STREAMLIT": TokenType.STREAMLIT, 824 } 825 KEYWORDS.pop("/*+") 826 827 SINGLE_TOKENS = { 828 **tokens.Tokenizer.SINGLE_TOKENS, 829 "$": TokenType.PARAMETER, 830 } 831 832 VAR_SINGLE_TOKENS = {"$"} 833 834 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 835 836 class Generator(generator.Generator): 837 PARAMETER_TOKEN = "$" 838 MATCHED_BY_SOURCE = False 839 SINGLE_STRING_INTERVAL = True 840 JOIN_HINTS = False 841 TABLE_HINTS = False 842 QUERY_HINTS = False 843 AGGREGATE_FILTER_SUPPORTED = False 844 SUPPORTS_TABLE_COPY = False 845 COLLATE_IS_FUNC = True 846 LIMIT_ONLY_LITERALS = True 847 JSON_KEY_VALUE_PAIR_SEP = "," 848 INSERT_OVERWRITE = " OVERWRITE INTO" 849 STRUCT_DELIMITER = ("(", ")") 850 COPY_PARAMS_ARE_WRAPPED = False 851 COPY_PARAMS_EQ_REQUIRED = True 852 STAR_EXCEPT = "EXCLUDE" 853 SUPPORTS_EXPLODING_PROJECTIONS = False 854 ARRAY_CONCAT_IS_VAR_LEN = False 855 SUPPORTS_CONVERT_TIMEZONE = True 856 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 857 SUPPORTS_MEDIAN = True 858 ARRAY_SIZE_NAME = "ARRAY_SIZE" 859 860 TRANSFORMS = { 861 **generator.Generator.TRANSFORMS, 862 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 863 exp.ArgMax: rename_func("MAX_BY"), 864 exp.ArgMin: rename_func("MIN_BY"), 865 exp.Array: inline_array_sql, 866 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 867 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 868 exp.AtTimeZone: lambda self, e: self.func( 869 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 870 ), 871 exp.BitwiseXor: rename_func("BITXOR"), 872 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 873 exp.DateAdd: date_delta_sql("DATEADD"), 874 exp.DateDiff: date_delta_sql("DATEDIFF"), 875 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 876 exp.DatetimeDiff: timestampdiff_sql, 877 exp.DateStrToDate: datestrtodate_sql, 878 exp.DayOfMonth: rename_func("DAYOFMONTH"), 879 exp.DayOfWeek: rename_func("DAYOFWEEK"), 880 exp.DayOfYear: rename_func("DAYOFYEAR"), 881 exp.Explode: rename_func("FLATTEN"), 882 exp.Extract: rename_func("DATE_PART"), 883 exp.FromTimeZone: lambda self, e: self.func( 884 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 885 ), 886 exp.GenerateSeries: lambda self, e: self.func( 887 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 888 ), 889 exp.GroupConcat: rename_func("LISTAGG"), 890 exp.If: if_sql(name="IFF", false_value="NULL"), 891 exp.JSONExtractArray: _json_extract_value_array_sql, 892 exp.JSONExtractScalar: lambda self, e: self.func( 893 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 894 ), 895 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 896 exp.JSONPathRoot: lambda *_: "", 897 exp.JSONValueArray: _json_extract_value_array_sql, 898 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 899 exp.LogicalOr: rename_func("BOOLOR_AGG"), 900 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 901 exp.MakeInterval: no_make_interval_sql, 902 exp.Max: max_or_greatest, 903 exp.Min: min_or_least, 904 exp.ParseJSON: lambda self, e: self.func( 905 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 906 ), 907 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 908 exp.PercentileCont: transforms.preprocess( 909 [transforms.add_within_group_for_percentiles] 910 ), 911 exp.PercentileDisc: transforms.preprocess( 912 [transforms.add_within_group_for_percentiles] 913 ), 914 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 915 exp.RegexpExtract: _regexpextract_sql, 916 exp.RegexpExtractAll: _regexpextract_sql, 917 exp.RegexpILike: _regexpilike_sql, 918 exp.Rand: rename_func("RANDOM"), 919 exp.Select: transforms.preprocess( 920 [ 921 transforms.eliminate_distinct_on, 922 transforms.explode_to_unnest(), 923 transforms.eliminate_semi_and_anti_joins, 924 _transform_generate_date_array, 925 ] 926 ), 927 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 928 exp.SHA: rename_func("SHA1"), 929 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 930 exp.StartsWith: rename_func("STARTSWITH"), 931 exp.StrPosition: lambda self, e: self.func( 932 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 933 ), 934 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 935 exp.Stuff: rename_func("INSERT"), 936 exp.TimeAdd: date_delta_sql("TIMEADD"), 937 exp.Timestamp: no_timestamp_sql, 938 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 939 exp.TimestampDiff: lambda self, e: self.func( 940 "TIMESTAMPDIFF", e.unit, e.expression, e.this 941 ), 942 exp.TimestampTrunc: timestamptrunc_sql(), 943 exp.TimeStrToTime: timestrtotime_sql, 944 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 945 exp.ToArray: rename_func("TO_ARRAY"), 946 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 947 exp.ToDouble: rename_func("TO_DOUBLE"), 948 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 949 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 950 exp.TsOrDsToDate: lambda self, e: self.func( 951 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 952 ), 953 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 954 exp.Uuid: rename_func("UUID_STRING"), 955 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 956 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 957 exp.Xor: rename_func("BOOLXOR"), 958 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 959 rename_func("EDITDISTANCE") 960 ), 961 } 962 963 SUPPORTED_JSON_PATH_PARTS = { 964 exp.JSONPathKey, 965 exp.JSONPathRoot, 966 exp.JSONPathSubscript, 967 } 968 969 TYPE_MAPPING = { 970 **generator.Generator.TYPE_MAPPING, 971 exp.DataType.Type.NESTED: "OBJECT", 972 exp.DataType.Type.STRUCT: "OBJECT", 973 } 974 975 PROPERTIES_LOCATION = { 976 **generator.Generator.PROPERTIES_LOCATION, 977 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 978 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 979 } 980 981 UNSUPPORTED_VALUES_EXPRESSIONS = { 982 exp.Map, 983 exp.StarMap, 984 exp.Struct, 985 exp.VarMap, 986 } 987 988 def with_properties(self, properties: exp.Properties) -> str: 989 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 990 991 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 992 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 993 values_as_table = False 994 995 return super().values_sql(expression, values_as_table=values_as_table) 996 997 def datatype_sql(self, expression: exp.DataType) -> str: 998 expressions = expression.expressions 999 if ( 1000 expressions 1001 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1002 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1003 ): 1004 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1005 return "OBJECT" 1006 1007 return super().datatype_sql(expression) 1008 1009 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1010 return self.func( 1011 "TO_NUMBER", 1012 expression.this, 1013 expression.args.get("format"), 1014 expression.args.get("precision"), 1015 expression.args.get("scale"), 1016 ) 1017 1018 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1019 milli = expression.args.get("milli") 1020 if milli is not None: 1021 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1022 expression.set("nano", milli_to_nano) 1023 1024 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1025 1026 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1027 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1028 return self.func("TO_GEOGRAPHY", expression.this) 1029 if expression.is_type(exp.DataType.Type.GEOMETRY): 1030 return self.func("TO_GEOMETRY", expression.this) 1031 1032 return super().cast_sql(expression, safe_prefix=safe_prefix) 1033 1034 def trycast_sql(self, expression: exp.TryCast) -> str: 1035 value = expression.this 1036 1037 if value.type is None: 1038 from sqlglot.optimizer.annotate_types import annotate_types 1039 1040 value = annotate_types(value) 1041 1042 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1043 return super().trycast_sql(expression) 1044 1045 # TRY_CAST only works for string values in Snowflake 1046 return self.cast_sql(expression) 1047 1048 def log_sql(self, expression: exp.Log) -> str: 1049 if not expression.expression: 1050 return self.func("LN", expression.this) 1051 1052 return super().log_sql(expression) 1053 1054 def unnest_sql(self, expression: exp.Unnest) -> str: 1055 unnest_alias = expression.args.get("alias") 1056 offset = expression.args.get("offset") 1057 1058 columns = [ 1059 exp.to_identifier("seq"), 1060 exp.to_identifier("key"), 1061 exp.to_identifier("path"), 1062 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1063 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1064 or exp.to_identifier("value"), 1065 exp.to_identifier("this"), 1066 ] 1067 1068 if unnest_alias: 1069 unnest_alias.set("columns", columns) 1070 else: 1071 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1072 1073 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1074 alias = self.sql(unnest_alias) 1075 alias = f" AS {alias}" if alias else "" 1076 return f"{explode}{alias}" 1077 1078 def show_sql(self, expression: exp.Show) -> str: 1079 terse = "TERSE " if expression.args.get("terse") else "" 1080 history = " HISTORY" if expression.args.get("history") else "" 1081 like = self.sql(expression, "like") 1082 like = f" LIKE {like}" if like else "" 1083 1084 scope = self.sql(expression, "scope") 1085 scope = f" {scope}" if scope else "" 1086 1087 scope_kind = self.sql(expression, "scope_kind") 1088 if scope_kind: 1089 scope_kind = f" IN {scope_kind}" 1090 1091 starts_with = self.sql(expression, "starts_with") 1092 if starts_with: 1093 starts_with = f" STARTS WITH {starts_with}" 1094 1095 limit = self.sql(expression, "limit") 1096 1097 from_ = self.sql(expression, "from") 1098 if from_: 1099 from_ = f" FROM {from_}" 1100 1101 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1102 1103 def describe_sql(self, expression: exp.Describe) -> str: 1104 # Default to table if kind is unknown 1105 kind_value = expression.args.get("kind") or "TABLE" 1106 kind = f" {kind_value}" if kind_value else "" 1107 this = f" {self.sql(expression, 'this')}" 1108 expressions = self.expressions(expression, flat=True) 1109 expressions = f" {expressions}" if expressions else "" 1110 return f"DESCRIBE{kind}{this}{expressions}" 1111 1112 def generatedasidentitycolumnconstraint_sql( 1113 self, expression: exp.GeneratedAsIdentityColumnConstraint 1114 ) -> str: 1115 start = expression.args.get("start") 1116 start = f" START {start}" if start else "" 1117 increment = expression.args.get("increment") 1118 increment = f" INCREMENT {increment}" if increment else "" 1119 return f"AUTOINCREMENT{start}{increment}" 1120 1121 def cluster_sql(self, expression: exp.Cluster) -> str: 1122 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1123 1124 def struct_sql(self, expression: exp.Struct) -> str: 1125 keys = [] 1126 values = [] 1127 1128 for i, e in enumerate(expression.expressions): 1129 if isinstance(e, exp.PropertyEQ): 1130 keys.append( 1131 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1132 ) 1133 values.append(e.expression) 1134 else: 1135 keys.append(exp.Literal.string(f"_{i}")) 1136 values.append(e) 1137 1138 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1139 1140 @unsupported_args("weight", "accuracy") 1141 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1142 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1143 1144 def alterset_sql(self, expression: exp.AlterSet) -> str: 1145 exprs = self.expressions(expression, flat=True) 1146 exprs = f" {exprs}" if exprs else "" 1147 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1148 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1149 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1150 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1151 tag = self.expressions(expression, key="tag", flat=True) 1152 tag = f" TAG {tag}" if tag else "" 1153 1154 return f"SET{exprs}{file_format}{copy_options}{tag}" 1155 1156 def strtotime_sql(self, expression: exp.StrToTime): 1157 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1158 return self.func( 1159 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1160 ) 1161 1162 def timestampsub_sql(self, expression: exp.TimestampSub): 1163 return self.sql( 1164 exp.TimestampAdd( 1165 this=expression.this, 1166 expression=expression.expression * -1, 1167 unit=expression.unit, 1168 ) 1169 ) 1170 1171 def jsonextract_sql(self, expression: exp.JSONExtract): 1172 this = expression.this 1173 1174 # JSON strings are valid coming from other dialects such as BQ 1175 return self.func( 1176 "GET_PATH", 1177 exp.ParseJSON(this=this) if this.is_string else this, 1178 expression.expression, 1179 ) 1180 1181 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1182 this = expression.this 1183 if not isinstance(this, exp.TsOrDsToTimestamp): 1184 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1185 1186 return self.func("TO_CHAR", this, self.format_time(expression))
316class Snowflake(Dialect): 317 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 318 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 319 NULL_ORDERING = "nulls_are_large" 320 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 321 SUPPORTS_USER_DEFINED_TYPES = False 322 SUPPORTS_SEMI_ANTI_JOIN = False 323 PREFER_CTE_ALIAS_COLUMN = True 324 TABLESAMPLE_SIZE_IS_PERCENT = True 325 COPY_PARAMS_ARE_CSV = False 326 ARRAY_AGG_INCLUDES_NULLS = None 327 328 TIME_MAPPING = { 329 "YYYY": "%Y", 330 "yyyy": "%Y", 331 "YY": "%y", 332 "yy": "%y", 333 "MMMM": "%B", 334 "mmmm": "%B", 335 "MON": "%b", 336 "mon": "%b", 337 "MM": "%m", 338 "mm": "%m", 339 "DD": "%d", 340 "dd": "%-d", 341 "DY": "%a", 342 "dy": "%w", 343 "HH24": "%H", 344 "hh24": "%H", 345 "HH12": "%I", 346 "hh12": "%I", 347 "MI": "%M", 348 "mi": "%M", 349 "SS": "%S", 350 "ss": "%S", 351 "FF": "%f", 352 "ff": "%f", 353 "FF6": "%f", 354 "ff6": "%f", 355 } 356 357 def quote_identifier(self, expression: E, identify: bool = True) -> E: 358 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 359 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 360 if ( 361 isinstance(expression, exp.Identifier) 362 and isinstance(expression.parent, exp.Table) 363 and expression.name.lower() == "dual" 364 ): 365 return expression # type: ignore 366 367 return super().quote_identifier(expression, identify=identify) 368 369 class Parser(parser.Parser): 370 IDENTIFY_PIVOT_STRINGS = True 371 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 372 COLON_IS_VARIANT_EXTRACT = True 373 374 ID_VAR_TOKENS = { 375 *parser.Parser.ID_VAR_TOKENS, 376 TokenType.MATCH_CONDITION, 377 } 378 379 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 380 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 381 382 FUNCTIONS = { 383 **parser.Parser.FUNCTIONS, 384 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 385 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 386 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 387 this=seq_get(args, 1), expression=seq_get(args, 0) 388 ), 389 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 390 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 391 start=seq_get(args, 0), 392 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 393 step=seq_get(args, 2), 394 ), 395 "BITXOR": binary_from_function(exp.BitwiseXor), 396 "BIT_XOR": binary_from_function(exp.BitwiseXor), 397 "BOOLXOR": binary_from_function(exp.Xor), 398 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 399 "DATE_TRUNC": _date_trunc_to_time, 400 "DATEADD": _build_date_time_add(exp.DateAdd), 401 "DATEDIFF": _build_datediff, 402 "DIV0": _build_if_from_div0, 403 "EDITDISTANCE": lambda args: exp.Levenshtein( 404 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 405 ), 406 "FLATTEN": exp.Explode.from_arg_list, 407 "GET_PATH": lambda args, dialect: exp.JSONExtract( 408 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 409 ), 410 "IFF": exp.If.from_arg_list, 411 "LAST_DAY": lambda args: exp.LastDay( 412 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 413 ), 414 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 415 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 416 "LISTAGG": exp.GroupConcat.from_arg_list, 417 "NULLIFZERO": _build_if_from_nullifzero, 418 "OBJECT_CONSTRUCT": _build_object_construct, 419 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 420 "REGEXP_REPLACE": _build_regexp_replace, 421 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 422 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 423 "RLIKE": exp.RegexpLike.from_arg_list, 424 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 425 "TIMEADD": _build_date_time_add(exp.TimeAdd), 426 "TIMEDIFF": _build_datediff, 427 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 428 "TIMESTAMPDIFF": _build_datediff, 429 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 430 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 431 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 432 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 433 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 434 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 435 "TRY_TO_TIMESTAMP": _build_datetime( 436 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 437 ), 438 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 439 "TO_NUMBER": lambda args: exp.ToNumber( 440 this=seq_get(args, 0), 441 format=seq_get(args, 1), 442 precision=seq_get(args, 2), 443 scale=seq_get(args, 3), 444 ), 445 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 446 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 447 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 448 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 449 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 450 "TO_VARCHAR": exp.ToChar.from_arg_list, 451 "ZEROIFNULL": _build_if_from_zeroifnull, 452 } 453 454 FUNCTION_PARSERS = { 455 **parser.Parser.FUNCTION_PARSERS, 456 "DATE_PART": lambda self: self._parse_date_part(), 457 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 458 } 459 FUNCTION_PARSERS.pop("TRIM") 460 461 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 462 463 RANGE_PARSERS = { 464 **parser.Parser.RANGE_PARSERS, 465 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 466 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 467 } 468 469 ALTER_PARSERS = { 470 **parser.Parser.ALTER_PARSERS, 471 "UNSET": lambda self: self.expression( 472 exp.Set, 473 tag=self._match_text_seq("TAG"), 474 expressions=self._parse_csv(self._parse_id_var), 475 unset=True, 476 ), 477 } 478 479 STATEMENT_PARSERS = { 480 **parser.Parser.STATEMENT_PARSERS, 481 TokenType.SHOW: lambda self: self._parse_show(), 482 } 483 484 PROPERTY_PARSERS = { 485 **parser.Parser.PROPERTY_PARSERS, 486 "LOCATION": lambda self: self._parse_location_property(), 487 "TAG": lambda self: self._parse_tag(), 488 } 489 490 TYPE_CONVERTERS = { 491 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 492 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 493 } 494 495 SHOW_PARSERS = { 496 "SCHEMAS": _show_parser("SCHEMAS"), 497 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 498 "OBJECTS": _show_parser("OBJECTS"), 499 "TERSE OBJECTS": _show_parser("OBJECTS"), 500 "TABLES": _show_parser("TABLES"), 501 "TERSE TABLES": _show_parser("TABLES"), 502 "VIEWS": _show_parser("VIEWS"), 503 "TERSE VIEWS": _show_parser("VIEWS"), 504 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 505 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 506 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 507 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 508 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 509 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 510 "SEQUENCES": _show_parser("SEQUENCES"), 511 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 512 "COLUMNS": _show_parser("COLUMNS"), 513 "USERS": _show_parser("USERS"), 514 "TERSE USERS": _show_parser("USERS"), 515 } 516 517 CONSTRAINT_PARSERS = { 518 **parser.Parser.CONSTRAINT_PARSERS, 519 "WITH": lambda self: self._parse_with_constraint(), 520 "MASKING": lambda self: self._parse_with_constraint(), 521 "PROJECTION": lambda self: self._parse_with_constraint(), 522 "TAG": lambda self: self._parse_with_constraint(), 523 } 524 525 STAGED_FILE_SINGLE_TOKENS = { 526 TokenType.DOT, 527 TokenType.MOD, 528 TokenType.SLASH, 529 } 530 531 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 532 533 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 534 535 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 536 537 LAMBDAS = { 538 **parser.Parser.LAMBDAS, 539 TokenType.ARROW: lambda self, expressions: self.expression( 540 exp.Lambda, 541 this=self._replace_lambda( 542 self._parse_assignment(), 543 expressions, 544 ), 545 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 546 ), 547 } 548 549 def _negate_range( 550 self, this: t.Optional[exp.Expression] = None 551 ) -> t.Optional[exp.Expression]: 552 if not this: 553 return this 554 555 query = this.args.get("query") 556 if isinstance(this, exp.In) and isinstance(query, exp.Query): 557 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 558 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 559 # which can produce different results (most likely a SnowFlake bug). 560 # 561 # https://docs.snowflake.com/en/sql-reference/functions/in 562 # Context: https://github.com/tobymao/sqlglot/issues/3890 563 return self.expression( 564 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 565 ) 566 567 return self.expression(exp.Not, this=this) 568 569 def _parse_tag(self) -> exp.Tags: 570 return self.expression( 571 exp.Tags, 572 expressions=self._parse_wrapped_csv(self._parse_property), 573 ) 574 575 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 576 if self._prev.token_type != TokenType.WITH: 577 self._retreat(self._index - 1) 578 579 if self._match_text_seq("MASKING", "POLICY"): 580 policy = self._parse_column() 581 return self.expression( 582 exp.MaskingPolicyColumnConstraint, 583 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 584 expressions=self._match(TokenType.USING) 585 and self._parse_wrapped_csv(self._parse_id_var), 586 ) 587 if self._match_text_seq("PROJECTION", "POLICY"): 588 policy = self._parse_column() 589 return self.expression( 590 exp.ProjectionPolicyColumnConstraint, 591 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 592 ) 593 if self._match(TokenType.TAG): 594 return self._parse_tag() 595 596 return None 597 598 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 599 if self._match(TokenType.TAG): 600 return self._parse_tag() 601 602 return super()._parse_with_property() 603 604 def _parse_create(self) -> exp.Create | exp.Command: 605 expression = super()._parse_create() 606 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 607 # Replace the Table node with the enclosed Identifier 608 expression.this.replace(expression.this.this) 609 610 return expression 611 612 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 613 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 614 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 615 this = self._parse_var() or self._parse_type() 616 617 if not this: 618 return None 619 620 self._match(TokenType.COMMA) 621 expression = self._parse_bitwise() 622 this = map_date_part(this) 623 name = this.name.upper() 624 625 if name.startswith("EPOCH"): 626 if name == "EPOCH_MILLISECOND": 627 scale = 10**3 628 elif name == "EPOCH_MICROSECOND": 629 scale = 10**6 630 elif name == "EPOCH_NANOSECOND": 631 scale = 10**9 632 else: 633 scale = None 634 635 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 636 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 637 638 if scale: 639 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 640 641 return to_unix 642 643 return self.expression(exp.Extract, this=this, expression=expression) 644 645 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 646 if is_map: 647 # Keys are strings in Snowflake's objects, see also: 648 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 649 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 650 return self._parse_slice(self._parse_string()) 651 652 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 653 654 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 655 lateral = super()._parse_lateral() 656 if not lateral: 657 return lateral 658 659 if isinstance(lateral.this, exp.Explode): 660 table_alias = lateral.args.get("alias") 661 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 662 if table_alias and not table_alias.args.get("columns"): 663 table_alias.set("columns", columns) 664 elif not table_alias: 665 exp.alias_(lateral, "_flattened", table=columns, copy=False) 666 667 return lateral 668 669 def _parse_table_parts( 670 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 671 ) -> exp.Table: 672 # https://docs.snowflake.com/en/user-guide/querying-stage 673 if self._match(TokenType.STRING, advance=False): 674 table = self._parse_string() 675 elif self._match_text_seq("@", advance=False): 676 table = self._parse_location_path() 677 else: 678 table = None 679 680 if table: 681 file_format = None 682 pattern = None 683 684 wrapped = self._match(TokenType.L_PAREN) 685 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 686 if self._match_text_seq("FILE_FORMAT", "=>"): 687 file_format = self._parse_string() or super()._parse_table_parts( 688 is_db_reference=is_db_reference 689 ) 690 elif self._match_text_seq("PATTERN", "=>"): 691 pattern = self._parse_string() 692 else: 693 break 694 695 self._match(TokenType.COMMA) 696 697 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 698 else: 699 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 700 701 return table 702 703 def _parse_id_var( 704 self, 705 any_token: bool = True, 706 tokens: t.Optional[t.Collection[TokenType]] = None, 707 ) -> t.Optional[exp.Expression]: 708 if self._match_text_seq("IDENTIFIER", "("): 709 identifier = ( 710 super()._parse_id_var(any_token=any_token, tokens=tokens) 711 or self._parse_string() 712 ) 713 self._match_r_paren() 714 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 715 716 return super()._parse_id_var(any_token=any_token, tokens=tokens) 717 718 def _parse_show_snowflake(self, this: str) -> exp.Show: 719 scope = None 720 scope_kind = None 721 722 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 723 # which is syntactically valid but has no effect on the output 724 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 725 726 history = self._match_text_seq("HISTORY") 727 728 like = self._parse_string() if self._match(TokenType.LIKE) else None 729 730 if self._match(TokenType.IN): 731 if self._match_text_seq("ACCOUNT"): 732 scope_kind = "ACCOUNT" 733 elif self._match_set(self.DB_CREATABLES): 734 scope_kind = self._prev.text.upper() 735 if self._curr: 736 scope = self._parse_table_parts() 737 elif self._curr: 738 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 739 scope = self._parse_table_parts() 740 741 return self.expression( 742 exp.Show, 743 **{ 744 "terse": terse, 745 "this": this, 746 "history": history, 747 "like": like, 748 "scope": scope, 749 "scope_kind": scope_kind, 750 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 751 "limit": self._parse_limit(), 752 "from": self._parse_string() if self._match(TokenType.FROM) else None, 753 }, 754 ) 755 756 def _parse_location_property(self) -> exp.LocationProperty: 757 self._match(TokenType.EQ) 758 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 759 760 def _parse_file_location(self) -> t.Optional[exp.Expression]: 761 # Parse either a subquery or a staged file 762 return ( 763 self._parse_select(table=True, parse_subquery_alias=False) 764 if self._match(TokenType.L_PAREN, advance=False) 765 else self._parse_table_parts() 766 ) 767 768 def _parse_location_path(self) -> exp.Var: 769 parts = [self._advance_any(ignore_reserved=True)] 770 771 # We avoid consuming a comma token because external tables like @foo and @bar 772 # can be joined in a query with a comma separator, as well as closing paren 773 # in case of subqueries 774 while self._is_connected() and not self._match_set( 775 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 776 ): 777 parts.append(self._advance_any(ignore_reserved=True)) 778 779 return exp.var("".join(part.text for part in parts if part)) 780 781 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 782 this = super()._parse_lambda_arg() 783 784 if not this: 785 return this 786 787 typ = self._parse_types() 788 789 if typ: 790 return self.expression(exp.Cast, this=this, to=typ) 791 792 return this 793 794 class Tokenizer(tokens.Tokenizer): 795 STRING_ESCAPES = ["\\", "'"] 796 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 797 RAW_STRINGS = ["$$"] 798 COMMENTS = ["--", "//", ("/*", "*/")] 799 NESTED_COMMENTS = False 800 801 KEYWORDS = { 802 **tokens.Tokenizer.KEYWORDS, 803 "BYTEINT": TokenType.INT, 804 "CHAR VARYING": TokenType.VARCHAR, 805 "CHARACTER VARYING": TokenType.VARCHAR, 806 "EXCLUDE": TokenType.EXCEPT, 807 "ILIKE ANY": TokenType.ILIKE_ANY, 808 "LIKE ANY": TokenType.LIKE_ANY, 809 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 810 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 811 "MINUS": TokenType.EXCEPT, 812 "NCHAR VARYING": TokenType.VARCHAR, 813 "PUT": TokenType.COMMAND, 814 "REMOVE": TokenType.COMMAND, 815 "RM": TokenType.COMMAND, 816 "SAMPLE": TokenType.TABLE_SAMPLE, 817 "SQL_DOUBLE": TokenType.DOUBLE, 818 "SQL_VARCHAR": TokenType.VARCHAR, 819 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 820 "TAG": TokenType.TAG, 821 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 822 "TOP": TokenType.TOP, 823 "WAREHOUSE": TokenType.WAREHOUSE, 824 "STREAMLIT": TokenType.STREAMLIT, 825 } 826 KEYWORDS.pop("/*+") 827 828 SINGLE_TOKENS = { 829 **tokens.Tokenizer.SINGLE_TOKENS, 830 "$": TokenType.PARAMETER, 831 } 832 833 VAR_SINGLE_TOKENS = {"$"} 834 835 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 836 837 class Generator(generator.Generator): 838 PARAMETER_TOKEN = "$" 839 MATCHED_BY_SOURCE = False 840 SINGLE_STRING_INTERVAL = True 841 JOIN_HINTS = False 842 TABLE_HINTS = False 843 QUERY_HINTS = False 844 AGGREGATE_FILTER_SUPPORTED = False 845 SUPPORTS_TABLE_COPY = False 846 COLLATE_IS_FUNC = True 847 LIMIT_ONLY_LITERALS = True 848 JSON_KEY_VALUE_PAIR_SEP = "," 849 INSERT_OVERWRITE = " OVERWRITE INTO" 850 STRUCT_DELIMITER = ("(", ")") 851 COPY_PARAMS_ARE_WRAPPED = False 852 COPY_PARAMS_EQ_REQUIRED = True 853 STAR_EXCEPT = "EXCLUDE" 854 SUPPORTS_EXPLODING_PROJECTIONS = False 855 ARRAY_CONCAT_IS_VAR_LEN = False 856 SUPPORTS_CONVERT_TIMEZONE = True 857 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 858 SUPPORTS_MEDIAN = True 859 ARRAY_SIZE_NAME = "ARRAY_SIZE" 860 861 TRANSFORMS = { 862 **generator.Generator.TRANSFORMS, 863 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 864 exp.ArgMax: rename_func("MAX_BY"), 865 exp.ArgMin: rename_func("MIN_BY"), 866 exp.Array: inline_array_sql, 867 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 868 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 869 exp.AtTimeZone: lambda self, e: self.func( 870 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 871 ), 872 exp.BitwiseXor: rename_func("BITXOR"), 873 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 874 exp.DateAdd: date_delta_sql("DATEADD"), 875 exp.DateDiff: date_delta_sql("DATEDIFF"), 876 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 877 exp.DatetimeDiff: timestampdiff_sql, 878 exp.DateStrToDate: datestrtodate_sql, 879 exp.DayOfMonth: rename_func("DAYOFMONTH"), 880 exp.DayOfWeek: rename_func("DAYOFWEEK"), 881 exp.DayOfYear: rename_func("DAYOFYEAR"), 882 exp.Explode: rename_func("FLATTEN"), 883 exp.Extract: rename_func("DATE_PART"), 884 exp.FromTimeZone: lambda self, e: self.func( 885 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 886 ), 887 exp.GenerateSeries: lambda self, e: self.func( 888 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 889 ), 890 exp.GroupConcat: rename_func("LISTAGG"), 891 exp.If: if_sql(name="IFF", false_value="NULL"), 892 exp.JSONExtractArray: _json_extract_value_array_sql, 893 exp.JSONExtractScalar: lambda self, e: self.func( 894 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 895 ), 896 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 897 exp.JSONPathRoot: lambda *_: "", 898 exp.JSONValueArray: _json_extract_value_array_sql, 899 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 900 exp.LogicalOr: rename_func("BOOLOR_AGG"), 901 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 902 exp.MakeInterval: no_make_interval_sql, 903 exp.Max: max_or_greatest, 904 exp.Min: min_or_least, 905 exp.ParseJSON: lambda self, e: self.func( 906 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 907 ), 908 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 909 exp.PercentileCont: transforms.preprocess( 910 [transforms.add_within_group_for_percentiles] 911 ), 912 exp.PercentileDisc: transforms.preprocess( 913 [transforms.add_within_group_for_percentiles] 914 ), 915 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 916 exp.RegexpExtract: _regexpextract_sql, 917 exp.RegexpExtractAll: _regexpextract_sql, 918 exp.RegexpILike: _regexpilike_sql, 919 exp.Rand: rename_func("RANDOM"), 920 exp.Select: transforms.preprocess( 921 [ 922 transforms.eliminate_distinct_on, 923 transforms.explode_to_unnest(), 924 transforms.eliminate_semi_and_anti_joins, 925 _transform_generate_date_array, 926 ] 927 ), 928 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 929 exp.SHA: rename_func("SHA1"), 930 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 931 exp.StartsWith: rename_func("STARTSWITH"), 932 exp.StrPosition: lambda self, e: self.func( 933 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 934 ), 935 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 936 exp.Stuff: rename_func("INSERT"), 937 exp.TimeAdd: date_delta_sql("TIMEADD"), 938 exp.Timestamp: no_timestamp_sql, 939 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 940 exp.TimestampDiff: lambda self, e: self.func( 941 "TIMESTAMPDIFF", e.unit, e.expression, e.this 942 ), 943 exp.TimestampTrunc: timestamptrunc_sql(), 944 exp.TimeStrToTime: timestrtotime_sql, 945 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 946 exp.ToArray: rename_func("TO_ARRAY"), 947 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 948 exp.ToDouble: rename_func("TO_DOUBLE"), 949 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 950 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 951 exp.TsOrDsToDate: lambda self, e: self.func( 952 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 953 ), 954 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 955 exp.Uuid: rename_func("UUID_STRING"), 956 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 957 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 958 exp.Xor: rename_func("BOOLXOR"), 959 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 960 rename_func("EDITDISTANCE") 961 ), 962 } 963 964 SUPPORTED_JSON_PATH_PARTS = { 965 exp.JSONPathKey, 966 exp.JSONPathRoot, 967 exp.JSONPathSubscript, 968 } 969 970 TYPE_MAPPING = { 971 **generator.Generator.TYPE_MAPPING, 972 exp.DataType.Type.NESTED: "OBJECT", 973 exp.DataType.Type.STRUCT: "OBJECT", 974 } 975 976 PROPERTIES_LOCATION = { 977 **generator.Generator.PROPERTIES_LOCATION, 978 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 979 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 980 } 981 982 UNSUPPORTED_VALUES_EXPRESSIONS = { 983 exp.Map, 984 exp.StarMap, 985 exp.Struct, 986 exp.VarMap, 987 } 988 989 def with_properties(self, properties: exp.Properties) -> str: 990 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 991 992 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 993 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 994 values_as_table = False 995 996 return super().values_sql(expression, values_as_table=values_as_table) 997 998 def datatype_sql(self, expression: exp.DataType) -> str: 999 expressions = expression.expressions 1000 if ( 1001 expressions 1002 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1003 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1004 ): 1005 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1006 return "OBJECT" 1007 1008 return super().datatype_sql(expression) 1009 1010 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1011 return self.func( 1012 "TO_NUMBER", 1013 expression.this, 1014 expression.args.get("format"), 1015 expression.args.get("precision"), 1016 expression.args.get("scale"), 1017 ) 1018 1019 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1020 milli = expression.args.get("milli") 1021 if milli is not None: 1022 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1023 expression.set("nano", milli_to_nano) 1024 1025 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1026 1027 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1028 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1029 return self.func("TO_GEOGRAPHY", expression.this) 1030 if expression.is_type(exp.DataType.Type.GEOMETRY): 1031 return self.func("TO_GEOMETRY", expression.this) 1032 1033 return super().cast_sql(expression, safe_prefix=safe_prefix) 1034 1035 def trycast_sql(self, expression: exp.TryCast) -> str: 1036 value = expression.this 1037 1038 if value.type is None: 1039 from sqlglot.optimizer.annotate_types import annotate_types 1040 1041 value = annotate_types(value) 1042 1043 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1044 return super().trycast_sql(expression) 1045 1046 # TRY_CAST only works for string values in Snowflake 1047 return self.cast_sql(expression) 1048 1049 def log_sql(self, expression: exp.Log) -> str: 1050 if not expression.expression: 1051 return self.func("LN", expression.this) 1052 1053 return super().log_sql(expression) 1054 1055 def unnest_sql(self, expression: exp.Unnest) -> str: 1056 unnest_alias = expression.args.get("alias") 1057 offset = expression.args.get("offset") 1058 1059 columns = [ 1060 exp.to_identifier("seq"), 1061 exp.to_identifier("key"), 1062 exp.to_identifier("path"), 1063 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1064 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1065 or exp.to_identifier("value"), 1066 exp.to_identifier("this"), 1067 ] 1068 1069 if unnest_alias: 1070 unnest_alias.set("columns", columns) 1071 else: 1072 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1073 1074 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1075 alias = self.sql(unnest_alias) 1076 alias = f" AS {alias}" if alias else "" 1077 return f"{explode}{alias}" 1078 1079 def show_sql(self, expression: exp.Show) -> str: 1080 terse = "TERSE " if expression.args.get("terse") else "" 1081 history = " HISTORY" if expression.args.get("history") else "" 1082 like = self.sql(expression, "like") 1083 like = f" LIKE {like}" if like else "" 1084 1085 scope = self.sql(expression, "scope") 1086 scope = f" {scope}" if scope else "" 1087 1088 scope_kind = self.sql(expression, "scope_kind") 1089 if scope_kind: 1090 scope_kind = f" IN {scope_kind}" 1091 1092 starts_with = self.sql(expression, "starts_with") 1093 if starts_with: 1094 starts_with = f" STARTS WITH {starts_with}" 1095 1096 limit = self.sql(expression, "limit") 1097 1098 from_ = self.sql(expression, "from") 1099 if from_: 1100 from_ = f" FROM {from_}" 1101 1102 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1103 1104 def describe_sql(self, expression: exp.Describe) -> str: 1105 # Default to table if kind is unknown 1106 kind_value = expression.args.get("kind") or "TABLE" 1107 kind = f" {kind_value}" if kind_value else "" 1108 this = f" {self.sql(expression, 'this')}" 1109 expressions = self.expressions(expression, flat=True) 1110 expressions = f" {expressions}" if expressions else "" 1111 return f"DESCRIBE{kind}{this}{expressions}" 1112 1113 def generatedasidentitycolumnconstraint_sql( 1114 self, expression: exp.GeneratedAsIdentityColumnConstraint 1115 ) -> str: 1116 start = expression.args.get("start") 1117 start = f" START {start}" if start else "" 1118 increment = expression.args.get("increment") 1119 increment = f" INCREMENT {increment}" if increment else "" 1120 return f"AUTOINCREMENT{start}{increment}" 1121 1122 def cluster_sql(self, expression: exp.Cluster) -> str: 1123 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1124 1125 def struct_sql(self, expression: exp.Struct) -> str: 1126 keys = [] 1127 values = [] 1128 1129 for i, e in enumerate(expression.expressions): 1130 if isinstance(e, exp.PropertyEQ): 1131 keys.append( 1132 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1133 ) 1134 values.append(e.expression) 1135 else: 1136 keys.append(exp.Literal.string(f"_{i}")) 1137 values.append(e) 1138 1139 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1140 1141 @unsupported_args("weight", "accuracy") 1142 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1143 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1144 1145 def alterset_sql(self, expression: exp.AlterSet) -> str: 1146 exprs = self.expressions(expression, flat=True) 1147 exprs = f" {exprs}" if exprs else "" 1148 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1149 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1150 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1151 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1152 tag = self.expressions(expression, key="tag", flat=True) 1153 tag = f" TAG {tag}" if tag else "" 1154 1155 return f"SET{exprs}{file_format}{copy_options}{tag}" 1156 1157 def strtotime_sql(self, expression: exp.StrToTime): 1158 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1159 return self.func( 1160 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1161 ) 1162 1163 def timestampsub_sql(self, expression: exp.TimestampSub): 1164 return self.sql( 1165 exp.TimestampAdd( 1166 this=expression.this, 1167 expression=expression.expression * -1, 1168 unit=expression.unit, 1169 ) 1170 ) 1171 1172 def jsonextract_sql(self, expression: exp.JSONExtract): 1173 this = expression.this 1174 1175 # JSON strings are valid coming from other dialects such as BQ 1176 return self.func( 1177 "GET_PATH", 1178 exp.ParseJSON(this=this) if this.is_string else this, 1179 expression.expression, 1180 ) 1181 1182 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1183 this = expression.this 1184 if not isinstance(this, exp.TsOrDsToTimestamp): 1185 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1186 1187 return self.func("TO_CHAR", this, self.format_time(expression))
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
357 def quote_identifier(self, expression: E, identify: bool = True) -> E: 358 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 359 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 360 if ( 361 isinstance(expression, exp.Identifier) 362 and isinstance(expression.parent, exp.Table) 363 and expression.name.lower() == "dual" 364 ): 365 return expression # type: ignore 366 367 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- PRESERVE_ORIGINAL_NAMES
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
369 class Parser(parser.Parser): 370 IDENTIFY_PIVOT_STRINGS = True 371 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 372 COLON_IS_VARIANT_EXTRACT = True 373 374 ID_VAR_TOKENS = { 375 *parser.Parser.ID_VAR_TOKENS, 376 TokenType.MATCH_CONDITION, 377 } 378 379 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 380 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 381 382 FUNCTIONS = { 383 **parser.Parser.FUNCTIONS, 384 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 385 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 386 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 387 this=seq_get(args, 1), expression=seq_get(args, 0) 388 ), 389 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 390 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 391 start=seq_get(args, 0), 392 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 393 step=seq_get(args, 2), 394 ), 395 "BITXOR": binary_from_function(exp.BitwiseXor), 396 "BIT_XOR": binary_from_function(exp.BitwiseXor), 397 "BOOLXOR": binary_from_function(exp.Xor), 398 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 399 "DATE_TRUNC": _date_trunc_to_time, 400 "DATEADD": _build_date_time_add(exp.DateAdd), 401 "DATEDIFF": _build_datediff, 402 "DIV0": _build_if_from_div0, 403 "EDITDISTANCE": lambda args: exp.Levenshtein( 404 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 405 ), 406 "FLATTEN": exp.Explode.from_arg_list, 407 "GET_PATH": lambda args, dialect: exp.JSONExtract( 408 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 409 ), 410 "IFF": exp.If.from_arg_list, 411 "LAST_DAY": lambda args: exp.LastDay( 412 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 413 ), 414 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 415 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 416 "LISTAGG": exp.GroupConcat.from_arg_list, 417 "NULLIFZERO": _build_if_from_nullifzero, 418 "OBJECT_CONSTRUCT": _build_object_construct, 419 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 420 "REGEXP_REPLACE": _build_regexp_replace, 421 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 422 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 423 "RLIKE": exp.RegexpLike.from_arg_list, 424 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 425 "TIMEADD": _build_date_time_add(exp.TimeAdd), 426 "TIMEDIFF": _build_datediff, 427 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 428 "TIMESTAMPDIFF": _build_datediff, 429 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 430 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 431 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 432 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 433 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 434 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 435 "TRY_TO_TIMESTAMP": _build_datetime( 436 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 437 ), 438 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 439 "TO_NUMBER": lambda args: exp.ToNumber( 440 this=seq_get(args, 0), 441 format=seq_get(args, 1), 442 precision=seq_get(args, 2), 443 scale=seq_get(args, 3), 444 ), 445 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 446 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 447 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 448 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 449 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 450 "TO_VARCHAR": exp.ToChar.from_arg_list, 451 "ZEROIFNULL": _build_if_from_zeroifnull, 452 } 453 454 FUNCTION_PARSERS = { 455 **parser.Parser.FUNCTION_PARSERS, 456 "DATE_PART": lambda self: self._parse_date_part(), 457 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 458 } 459 FUNCTION_PARSERS.pop("TRIM") 460 461 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 462 463 RANGE_PARSERS = { 464 **parser.Parser.RANGE_PARSERS, 465 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 466 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 467 } 468 469 ALTER_PARSERS = { 470 **parser.Parser.ALTER_PARSERS, 471 "UNSET": lambda self: self.expression( 472 exp.Set, 473 tag=self._match_text_seq("TAG"), 474 expressions=self._parse_csv(self._parse_id_var), 475 unset=True, 476 ), 477 } 478 479 STATEMENT_PARSERS = { 480 **parser.Parser.STATEMENT_PARSERS, 481 TokenType.SHOW: lambda self: self._parse_show(), 482 } 483 484 PROPERTY_PARSERS = { 485 **parser.Parser.PROPERTY_PARSERS, 486 "LOCATION": lambda self: self._parse_location_property(), 487 "TAG": lambda self: self._parse_tag(), 488 } 489 490 TYPE_CONVERTERS = { 491 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 492 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 493 } 494 495 SHOW_PARSERS = { 496 "SCHEMAS": _show_parser("SCHEMAS"), 497 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 498 "OBJECTS": _show_parser("OBJECTS"), 499 "TERSE OBJECTS": _show_parser("OBJECTS"), 500 "TABLES": _show_parser("TABLES"), 501 "TERSE TABLES": _show_parser("TABLES"), 502 "VIEWS": _show_parser("VIEWS"), 503 "TERSE VIEWS": _show_parser("VIEWS"), 504 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 505 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 506 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 507 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 508 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 509 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 510 "SEQUENCES": _show_parser("SEQUENCES"), 511 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 512 "COLUMNS": _show_parser("COLUMNS"), 513 "USERS": _show_parser("USERS"), 514 "TERSE USERS": _show_parser("USERS"), 515 } 516 517 CONSTRAINT_PARSERS = { 518 **parser.Parser.CONSTRAINT_PARSERS, 519 "WITH": lambda self: self._parse_with_constraint(), 520 "MASKING": lambda self: self._parse_with_constraint(), 521 "PROJECTION": lambda self: self._parse_with_constraint(), 522 "TAG": lambda self: self._parse_with_constraint(), 523 } 524 525 STAGED_FILE_SINGLE_TOKENS = { 526 TokenType.DOT, 527 TokenType.MOD, 528 TokenType.SLASH, 529 } 530 531 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 532 533 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 534 535 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 536 537 LAMBDAS = { 538 **parser.Parser.LAMBDAS, 539 TokenType.ARROW: lambda self, expressions: self.expression( 540 exp.Lambda, 541 this=self._replace_lambda( 542 self._parse_assignment(), 543 expressions, 544 ), 545 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 546 ), 547 } 548 549 def _negate_range( 550 self, this: t.Optional[exp.Expression] = None 551 ) -> t.Optional[exp.Expression]: 552 if not this: 553 return this 554 555 query = this.args.get("query") 556 if isinstance(this, exp.In) and isinstance(query, exp.Query): 557 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 558 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 559 # which can produce different results (most likely a SnowFlake bug). 560 # 561 # https://docs.snowflake.com/en/sql-reference/functions/in 562 # Context: https://github.com/tobymao/sqlglot/issues/3890 563 return self.expression( 564 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 565 ) 566 567 return self.expression(exp.Not, this=this) 568 569 def _parse_tag(self) -> exp.Tags: 570 return self.expression( 571 exp.Tags, 572 expressions=self._parse_wrapped_csv(self._parse_property), 573 ) 574 575 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 576 if self._prev.token_type != TokenType.WITH: 577 self._retreat(self._index - 1) 578 579 if self._match_text_seq("MASKING", "POLICY"): 580 policy = self._parse_column() 581 return self.expression( 582 exp.MaskingPolicyColumnConstraint, 583 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 584 expressions=self._match(TokenType.USING) 585 and self._parse_wrapped_csv(self._parse_id_var), 586 ) 587 if self._match_text_seq("PROJECTION", "POLICY"): 588 policy = self._parse_column() 589 return self.expression( 590 exp.ProjectionPolicyColumnConstraint, 591 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 592 ) 593 if self._match(TokenType.TAG): 594 return self._parse_tag() 595 596 return None 597 598 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 599 if self._match(TokenType.TAG): 600 return self._parse_tag() 601 602 return super()._parse_with_property() 603 604 def _parse_create(self) -> exp.Create | exp.Command: 605 expression = super()._parse_create() 606 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 607 # Replace the Table node with the enclosed Identifier 608 expression.this.replace(expression.this.this) 609 610 return expression 611 612 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 613 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 614 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 615 this = self._parse_var() or self._parse_type() 616 617 if not this: 618 return None 619 620 self._match(TokenType.COMMA) 621 expression = self._parse_bitwise() 622 this = map_date_part(this) 623 name = this.name.upper() 624 625 if name.startswith("EPOCH"): 626 if name == "EPOCH_MILLISECOND": 627 scale = 10**3 628 elif name == "EPOCH_MICROSECOND": 629 scale = 10**6 630 elif name == "EPOCH_NANOSECOND": 631 scale = 10**9 632 else: 633 scale = None 634 635 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 636 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 637 638 if scale: 639 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 640 641 return to_unix 642 643 return self.expression(exp.Extract, this=this, expression=expression) 644 645 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 646 if is_map: 647 # Keys are strings in Snowflake's objects, see also: 648 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 649 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 650 return self._parse_slice(self._parse_string()) 651 652 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 653 654 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 655 lateral = super()._parse_lateral() 656 if not lateral: 657 return lateral 658 659 if isinstance(lateral.this, exp.Explode): 660 table_alias = lateral.args.get("alias") 661 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 662 if table_alias and not table_alias.args.get("columns"): 663 table_alias.set("columns", columns) 664 elif not table_alias: 665 exp.alias_(lateral, "_flattened", table=columns, copy=False) 666 667 return lateral 668 669 def _parse_table_parts( 670 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 671 ) -> exp.Table: 672 # https://docs.snowflake.com/en/user-guide/querying-stage 673 if self._match(TokenType.STRING, advance=False): 674 table = self._parse_string() 675 elif self._match_text_seq("@", advance=False): 676 table = self._parse_location_path() 677 else: 678 table = None 679 680 if table: 681 file_format = None 682 pattern = None 683 684 wrapped = self._match(TokenType.L_PAREN) 685 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 686 if self._match_text_seq("FILE_FORMAT", "=>"): 687 file_format = self._parse_string() or super()._parse_table_parts( 688 is_db_reference=is_db_reference 689 ) 690 elif self._match_text_seq("PATTERN", "=>"): 691 pattern = self._parse_string() 692 else: 693 break 694 695 self._match(TokenType.COMMA) 696 697 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 698 else: 699 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 700 701 return table 702 703 def _parse_id_var( 704 self, 705 any_token: bool = True, 706 tokens: t.Optional[t.Collection[TokenType]] = None, 707 ) -> t.Optional[exp.Expression]: 708 if self._match_text_seq("IDENTIFIER", "("): 709 identifier = ( 710 super()._parse_id_var(any_token=any_token, tokens=tokens) 711 or self._parse_string() 712 ) 713 self._match_r_paren() 714 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 715 716 return super()._parse_id_var(any_token=any_token, tokens=tokens) 717 718 def _parse_show_snowflake(self, this: str) -> exp.Show: 719 scope = None 720 scope_kind = None 721 722 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 723 # which is syntactically valid but has no effect on the output 724 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 725 726 history = self._match_text_seq("HISTORY") 727 728 like = self._parse_string() if self._match(TokenType.LIKE) else None 729 730 if self._match(TokenType.IN): 731 if self._match_text_seq("ACCOUNT"): 732 scope_kind = "ACCOUNT" 733 elif self._match_set(self.DB_CREATABLES): 734 scope_kind = self._prev.text.upper() 735 if self._curr: 736 scope = self._parse_table_parts() 737 elif self._curr: 738 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 739 scope = self._parse_table_parts() 740 741 return self.expression( 742 exp.Show, 743 **{ 744 "terse": terse, 745 "this": this, 746 "history": history, 747 "like": like, 748 "scope": scope, 749 "scope_kind": scope_kind, 750 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 751 "limit": self._parse_limit(), 752 "from": self._parse_string() if self._match(TokenType.FROM) else None, 753 }, 754 ) 755 756 def _parse_location_property(self) -> exp.LocationProperty: 757 self._match(TokenType.EQ) 758 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 759 760 def _parse_file_location(self) -> t.Optional[exp.Expression]: 761 # Parse either a subquery or a staged file 762 return ( 763 self._parse_select(table=True, parse_subquery_alias=False) 764 if self._match(TokenType.L_PAREN, advance=False) 765 else self._parse_table_parts() 766 ) 767 768 def _parse_location_path(self) -> exp.Var: 769 parts = [self._advance_any(ignore_reserved=True)] 770 771 # We avoid consuming a comma token because external tables like @foo and @bar 772 # can be joined in a query with a comma separator, as well as closing paren 773 # in case of subqueries 774 while self._is_connected() and not self._match_set( 775 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 776 ): 777 parts.append(self._advance_any(ignore_reserved=True)) 778 779 return exp.var("".join(part.text for part in parts if part)) 780 781 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 782 this = super()._parse_lambda_arg() 783 784 if not this: 785 return this 786 787 typ = self._parse_types() 788 789 if typ: 790 return self.expression(exp.Cast, this=this, to=typ) 791 792 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
794 class Tokenizer(tokens.Tokenizer): 795 STRING_ESCAPES = ["\\", "'"] 796 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 797 RAW_STRINGS = ["$$"] 798 COMMENTS = ["--", "//", ("/*", "*/")] 799 NESTED_COMMENTS = False 800 801 KEYWORDS = { 802 **tokens.Tokenizer.KEYWORDS, 803 "BYTEINT": TokenType.INT, 804 "CHAR VARYING": TokenType.VARCHAR, 805 "CHARACTER VARYING": TokenType.VARCHAR, 806 "EXCLUDE": TokenType.EXCEPT, 807 "ILIKE ANY": TokenType.ILIKE_ANY, 808 "LIKE ANY": TokenType.LIKE_ANY, 809 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 810 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 811 "MINUS": TokenType.EXCEPT, 812 "NCHAR VARYING": TokenType.VARCHAR, 813 "PUT": TokenType.COMMAND, 814 "REMOVE": TokenType.COMMAND, 815 "RM": TokenType.COMMAND, 816 "SAMPLE": TokenType.TABLE_SAMPLE, 817 "SQL_DOUBLE": TokenType.DOUBLE, 818 "SQL_VARCHAR": TokenType.VARCHAR, 819 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 820 "TAG": TokenType.TAG, 821 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 822 "TOP": TokenType.TOP, 823 "WAREHOUSE": TokenType.WAREHOUSE, 824 "STREAMLIT": TokenType.STREAMLIT, 825 } 826 KEYWORDS.pop("/*+") 827 828 SINGLE_TOKENS = { 829 **tokens.Tokenizer.SINGLE_TOKENS, 830 "$": TokenType.PARAMETER, 831 } 832 833 VAR_SINGLE_TOKENS = {"$"} 834 835 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
837 class Generator(generator.Generator): 838 PARAMETER_TOKEN = "$" 839 MATCHED_BY_SOURCE = False 840 SINGLE_STRING_INTERVAL = True 841 JOIN_HINTS = False 842 TABLE_HINTS = False 843 QUERY_HINTS = False 844 AGGREGATE_FILTER_SUPPORTED = False 845 SUPPORTS_TABLE_COPY = False 846 COLLATE_IS_FUNC = True 847 LIMIT_ONLY_LITERALS = True 848 JSON_KEY_VALUE_PAIR_SEP = "," 849 INSERT_OVERWRITE = " OVERWRITE INTO" 850 STRUCT_DELIMITER = ("(", ")") 851 COPY_PARAMS_ARE_WRAPPED = False 852 COPY_PARAMS_EQ_REQUIRED = True 853 STAR_EXCEPT = "EXCLUDE" 854 SUPPORTS_EXPLODING_PROJECTIONS = False 855 ARRAY_CONCAT_IS_VAR_LEN = False 856 SUPPORTS_CONVERT_TIMEZONE = True 857 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 858 SUPPORTS_MEDIAN = True 859 ARRAY_SIZE_NAME = "ARRAY_SIZE" 860 861 TRANSFORMS = { 862 **generator.Generator.TRANSFORMS, 863 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 864 exp.ArgMax: rename_func("MAX_BY"), 865 exp.ArgMin: rename_func("MIN_BY"), 866 exp.Array: inline_array_sql, 867 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 868 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 869 exp.AtTimeZone: lambda self, e: self.func( 870 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 871 ), 872 exp.BitwiseXor: rename_func("BITXOR"), 873 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 874 exp.DateAdd: date_delta_sql("DATEADD"), 875 exp.DateDiff: date_delta_sql("DATEDIFF"), 876 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 877 exp.DatetimeDiff: timestampdiff_sql, 878 exp.DateStrToDate: datestrtodate_sql, 879 exp.DayOfMonth: rename_func("DAYOFMONTH"), 880 exp.DayOfWeek: rename_func("DAYOFWEEK"), 881 exp.DayOfYear: rename_func("DAYOFYEAR"), 882 exp.Explode: rename_func("FLATTEN"), 883 exp.Extract: rename_func("DATE_PART"), 884 exp.FromTimeZone: lambda self, e: self.func( 885 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 886 ), 887 exp.GenerateSeries: lambda self, e: self.func( 888 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 889 ), 890 exp.GroupConcat: rename_func("LISTAGG"), 891 exp.If: if_sql(name="IFF", false_value="NULL"), 892 exp.JSONExtractArray: _json_extract_value_array_sql, 893 exp.JSONExtractScalar: lambda self, e: self.func( 894 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 895 ), 896 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 897 exp.JSONPathRoot: lambda *_: "", 898 exp.JSONValueArray: _json_extract_value_array_sql, 899 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 900 exp.LogicalOr: rename_func("BOOLOR_AGG"), 901 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 902 exp.MakeInterval: no_make_interval_sql, 903 exp.Max: max_or_greatest, 904 exp.Min: min_or_least, 905 exp.ParseJSON: lambda self, e: self.func( 906 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 907 ), 908 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 909 exp.PercentileCont: transforms.preprocess( 910 [transforms.add_within_group_for_percentiles] 911 ), 912 exp.PercentileDisc: transforms.preprocess( 913 [transforms.add_within_group_for_percentiles] 914 ), 915 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 916 exp.RegexpExtract: _regexpextract_sql, 917 exp.RegexpExtractAll: _regexpextract_sql, 918 exp.RegexpILike: _regexpilike_sql, 919 exp.Rand: rename_func("RANDOM"), 920 exp.Select: transforms.preprocess( 921 [ 922 transforms.eliminate_distinct_on, 923 transforms.explode_to_unnest(), 924 transforms.eliminate_semi_and_anti_joins, 925 _transform_generate_date_array, 926 ] 927 ), 928 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 929 exp.SHA: rename_func("SHA1"), 930 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 931 exp.StartsWith: rename_func("STARTSWITH"), 932 exp.StrPosition: lambda self, e: self.func( 933 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 934 ), 935 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 936 exp.Stuff: rename_func("INSERT"), 937 exp.TimeAdd: date_delta_sql("TIMEADD"), 938 exp.Timestamp: no_timestamp_sql, 939 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 940 exp.TimestampDiff: lambda self, e: self.func( 941 "TIMESTAMPDIFF", e.unit, e.expression, e.this 942 ), 943 exp.TimestampTrunc: timestamptrunc_sql(), 944 exp.TimeStrToTime: timestrtotime_sql, 945 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 946 exp.ToArray: rename_func("TO_ARRAY"), 947 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 948 exp.ToDouble: rename_func("TO_DOUBLE"), 949 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 950 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 951 exp.TsOrDsToDate: lambda self, e: self.func( 952 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 953 ), 954 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 955 exp.Uuid: rename_func("UUID_STRING"), 956 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 957 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 958 exp.Xor: rename_func("BOOLXOR"), 959 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 960 rename_func("EDITDISTANCE") 961 ), 962 } 963 964 SUPPORTED_JSON_PATH_PARTS = { 965 exp.JSONPathKey, 966 exp.JSONPathRoot, 967 exp.JSONPathSubscript, 968 } 969 970 TYPE_MAPPING = { 971 **generator.Generator.TYPE_MAPPING, 972 exp.DataType.Type.NESTED: "OBJECT", 973 exp.DataType.Type.STRUCT: "OBJECT", 974 } 975 976 PROPERTIES_LOCATION = { 977 **generator.Generator.PROPERTIES_LOCATION, 978 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 979 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 980 } 981 982 UNSUPPORTED_VALUES_EXPRESSIONS = { 983 exp.Map, 984 exp.StarMap, 985 exp.Struct, 986 exp.VarMap, 987 } 988 989 def with_properties(self, properties: exp.Properties) -> str: 990 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 991 992 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 993 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 994 values_as_table = False 995 996 return super().values_sql(expression, values_as_table=values_as_table) 997 998 def datatype_sql(self, expression: exp.DataType) -> str: 999 expressions = expression.expressions 1000 if ( 1001 expressions 1002 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1003 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1004 ): 1005 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1006 return "OBJECT" 1007 1008 return super().datatype_sql(expression) 1009 1010 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1011 return self.func( 1012 "TO_NUMBER", 1013 expression.this, 1014 expression.args.get("format"), 1015 expression.args.get("precision"), 1016 expression.args.get("scale"), 1017 ) 1018 1019 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1020 milli = expression.args.get("milli") 1021 if milli is not None: 1022 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1023 expression.set("nano", milli_to_nano) 1024 1025 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1026 1027 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1028 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1029 return self.func("TO_GEOGRAPHY", expression.this) 1030 if expression.is_type(exp.DataType.Type.GEOMETRY): 1031 return self.func("TO_GEOMETRY", expression.this) 1032 1033 return super().cast_sql(expression, safe_prefix=safe_prefix) 1034 1035 def trycast_sql(self, expression: exp.TryCast) -> str: 1036 value = expression.this 1037 1038 if value.type is None: 1039 from sqlglot.optimizer.annotate_types import annotate_types 1040 1041 value = annotate_types(value) 1042 1043 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1044 return super().trycast_sql(expression) 1045 1046 # TRY_CAST only works for string values in Snowflake 1047 return self.cast_sql(expression) 1048 1049 def log_sql(self, expression: exp.Log) -> str: 1050 if not expression.expression: 1051 return self.func("LN", expression.this) 1052 1053 return super().log_sql(expression) 1054 1055 def unnest_sql(self, expression: exp.Unnest) -> str: 1056 unnest_alias = expression.args.get("alias") 1057 offset = expression.args.get("offset") 1058 1059 columns = [ 1060 exp.to_identifier("seq"), 1061 exp.to_identifier("key"), 1062 exp.to_identifier("path"), 1063 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1064 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1065 or exp.to_identifier("value"), 1066 exp.to_identifier("this"), 1067 ] 1068 1069 if unnest_alias: 1070 unnest_alias.set("columns", columns) 1071 else: 1072 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1073 1074 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1075 alias = self.sql(unnest_alias) 1076 alias = f" AS {alias}" if alias else "" 1077 return f"{explode}{alias}" 1078 1079 def show_sql(self, expression: exp.Show) -> str: 1080 terse = "TERSE " if expression.args.get("terse") else "" 1081 history = " HISTORY" if expression.args.get("history") else "" 1082 like = self.sql(expression, "like") 1083 like = f" LIKE {like}" if like else "" 1084 1085 scope = self.sql(expression, "scope") 1086 scope = f" {scope}" if scope else "" 1087 1088 scope_kind = self.sql(expression, "scope_kind") 1089 if scope_kind: 1090 scope_kind = f" IN {scope_kind}" 1091 1092 starts_with = self.sql(expression, "starts_with") 1093 if starts_with: 1094 starts_with = f" STARTS WITH {starts_with}" 1095 1096 limit = self.sql(expression, "limit") 1097 1098 from_ = self.sql(expression, "from") 1099 if from_: 1100 from_ = f" FROM {from_}" 1101 1102 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1103 1104 def describe_sql(self, expression: exp.Describe) -> str: 1105 # Default to table if kind is unknown 1106 kind_value = expression.args.get("kind") or "TABLE" 1107 kind = f" {kind_value}" if kind_value else "" 1108 this = f" {self.sql(expression, 'this')}" 1109 expressions = self.expressions(expression, flat=True) 1110 expressions = f" {expressions}" if expressions else "" 1111 return f"DESCRIBE{kind}{this}{expressions}" 1112 1113 def generatedasidentitycolumnconstraint_sql( 1114 self, expression: exp.GeneratedAsIdentityColumnConstraint 1115 ) -> str: 1116 start = expression.args.get("start") 1117 start = f" START {start}" if start else "" 1118 increment = expression.args.get("increment") 1119 increment = f" INCREMENT {increment}" if increment else "" 1120 return f"AUTOINCREMENT{start}{increment}" 1121 1122 def cluster_sql(self, expression: exp.Cluster) -> str: 1123 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1124 1125 def struct_sql(self, expression: exp.Struct) -> str: 1126 keys = [] 1127 values = [] 1128 1129 for i, e in enumerate(expression.expressions): 1130 if isinstance(e, exp.PropertyEQ): 1131 keys.append( 1132 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1133 ) 1134 values.append(e.expression) 1135 else: 1136 keys.append(exp.Literal.string(f"_{i}")) 1137 values.append(e) 1138 1139 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1140 1141 @unsupported_args("weight", "accuracy") 1142 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1143 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1144 1145 def alterset_sql(self, expression: exp.AlterSet) -> str: 1146 exprs = self.expressions(expression, flat=True) 1147 exprs = f" {exprs}" if exprs else "" 1148 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1149 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1150 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1151 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1152 tag = self.expressions(expression, key="tag", flat=True) 1153 tag = f" TAG {tag}" if tag else "" 1154 1155 return f"SET{exprs}{file_format}{copy_options}{tag}" 1156 1157 def strtotime_sql(self, expression: exp.StrToTime): 1158 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1159 return self.func( 1160 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1161 ) 1162 1163 def timestampsub_sql(self, expression: exp.TimestampSub): 1164 return self.sql( 1165 exp.TimestampAdd( 1166 this=expression.this, 1167 expression=expression.expression * -1, 1168 unit=expression.unit, 1169 ) 1170 ) 1171 1172 def jsonextract_sql(self, expression: exp.JSONExtract): 1173 this = expression.this 1174 1175 # JSON strings are valid coming from other dialects such as BQ 1176 return self.func( 1177 "GET_PATH", 1178 exp.ParseJSON(this=this) if this.is_string else this, 1179 expression.expression, 1180 ) 1181 1182 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1183 this = expression.this 1184 if not isinstance(this, exp.TsOrDsToTimestamp): 1185 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1186 1187 return self.func("TO_CHAR", this, self.format_time(expression))
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
998 def datatype_sql(self, expression: exp.DataType) -> str: 999 expressions = expression.expressions 1000 if ( 1001 expressions 1002 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1003 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1004 ): 1005 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1006 return "OBJECT" 1007 1008 return super().datatype_sql(expression)
1019 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1020 milli = expression.args.get("milli") 1021 if milli is not None: 1022 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1023 expression.set("nano", milli_to_nano) 1024 1025 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1027 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1028 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1029 return self.func("TO_GEOGRAPHY", expression.this) 1030 if expression.is_type(exp.DataType.Type.GEOMETRY): 1031 return self.func("TO_GEOMETRY", expression.this) 1032 1033 return super().cast_sql(expression, safe_prefix=safe_prefix)
1035 def trycast_sql(self, expression: exp.TryCast) -> str: 1036 value = expression.this 1037 1038 if value.type is None: 1039 from sqlglot.optimizer.annotate_types import annotate_types 1040 1041 value = annotate_types(value) 1042 1043 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1044 return super().trycast_sql(expression) 1045 1046 # TRY_CAST only works for string values in Snowflake 1047 return self.cast_sql(expression)
1055 def unnest_sql(self, expression: exp.Unnest) -> str: 1056 unnest_alias = expression.args.get("alias") 1057 offset = expression.args.get("offset") 1058 1059 columns = [ 1060 exp.to_identifier("seq"), 1061 exp.to_identifier("key"), 1062 exp.to_identifier("path"), 1063 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1064 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1065 or exp.to_identifier("value"), 1066 exp.to_identifier("this"), 1067 ] 1068 1069 if unnest_alias: 1070 unnest_alias.set("columns", columns) 1071 else: 1072 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1073 1074 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1075 alias = self.sql(unnest_alias) 1076 alias = f" AS {alias}" if alias else "" 1077 return f"{explode}{alias}"
1079 def show_sql(self, expression: exp.Show) -> str: 1080 terse = "TERSE " if expression.args.get("terse") else "" 1081 history = " HISTORY" if expression.args.get("history") else "" 1082 like = self.sql(expression, "like") 1083 like = f" LIKE {like}" if like else "" 1084 1085 scope = self.sql(expression, "scope") 1086 scope = f" {scope}" if scope else "" 1087 1088 scope_kind = self.sql(expression, "scope_kind") 1089 if scope_kind: 1090 scope_kind = f" IN {scope_kind}" 1091 1092 starts_with = self.sql(expression, "starts_with") 1093 if starts_with: 1094 starts_with = f" STARTS WITH {starts_with}" 1095 1096 limit = self.sql(expression, "limit") 1097 1098 from_ = self.sql(expression, "from") 1099 if from_: 1100 from_ = f" FROM {from_}" 1101 1102 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
1104 def describe_sql(self, expression: exp.Describe) -> str: 1105 # Default to table if kind is unknown 1106 kind_value = expression.args.get("kind") or "TABLE" 1107 kind = f" {kind_value}" if kind_value else "" 1108 this = f" {self.sql(expression, 'this')}" 1109 expressions = self.expressions(expression, flat=True) 1110 expressions = f" {expressions}" if expressions else "" 1111 return f"DESCRIBE{kind}{this}{expressions}"
1113 def generatedasidentitycolumnconstraint_sql( 1114 self, expression: exp.GeneratedAsIdentityColumnConstraint 1115 ) -> str: 1116 start = expression.args.get("start") 1117 start = f" START {start}" if start else "" 1118 increment = expression.args.get("increment") 1119 increment = f" INCREMENT {increment}" if increment else "" 1120 return f"AUTOINCREMENT{start}{increment}"
1125 def struct_sql(self, expression: exp.Struct) -> str: 1126 keys = [] 1127 values = [] 1128 1129 for i, e in enumerate(expression.expressions): 1130 if isinstance(e, exp.PropertyEQ): 1131 keys.append( 1132 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1133 ) 1134 values.append(e.expression) 1135 else: 1136 keys.append(exp.Literal.string(f"_{i}")) 1137 values.append(e) 1138 1139 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1145 def alterset_sql(self, expression: exp.AlterSet) -> str: 1146 exprs = self.expressions(expression, flat=True) 1147 exprs = f" {exprs}" if exprs else "" 1148 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1149 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1150 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1151 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1152 tag = self.expressions(expression, key="tag", flat=True) 1153 tag = f" TAG {tag}" if tag else "" 1154 1155 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql