sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25 no_safe_divide_sql, 26 no_timestamp_sql, 27 timestampdiff_sql, 28 no_make_interval_sql, 29) 30from sqlglot.generator import unsupported_args 31from sqlglot.helper import flatten, is_float, is_int, seq_get 32from sqlglot.tokens import TokenType 33 34if t.TYPE_CHECKING: 35 from sqlglot._typing import E 36 37 38# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 39def _build_datetime( 40 name: str, kind: exp.DataType.Type, safe: bool = False 41) -> t.Callable[[t.List], exp.Func]: 42 def _builder(args: t.List) -> exp.Func: 43 value = seq_get(args, 0) 44 scale_or_fmt = seq_get(args, 1) 45 46 int_value = value is not None and is_int(value.name) 47 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 48 49 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 50 # Converts calls like `TO_TIME('01:02:03')` into casts 51 if len(args) == 1 and value.is_string and not int_value: 52 return ( 53 exp.TryCast(this=value, to=exp.DataType.build(kind)) 54 if safe 55 else exp.cast(value, kind) 56 ) 57 58 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 59 # cases so we can transpile them, since they're relatively common 60 if kind == exp.DataType.Type.TIMESTAMP: 61 if not safe and (int_value or int_scale_or_fmt): 62 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 63 # it's not easily transpilable 64 return exp.UnixToTime(this=value, scale=scale_or_fmt) 65 if not int_scale_or_fmt and not is_float(value.name): 66 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 67 expr.set("safe", safe) 68 return expr 69 70 if kind == exp.DataType.Type.DATE and not int_value: 71 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 72 formatted_exp.set("safe", safe) 73 return formatted_exp 74 75 return exp.Anonymous(this=name, expressions=args) 76 77 return _builder 78 79 80def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 81 expression = parser.build_var_map(args) 82 83 if isinstance(expression, exp.StarMap): 84 return expression 85 86 return exp.Struct( 87 expressions=[ 88 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 89 ] 90 ) 91 92 93def _build_datediff(args: t.List) -> exp.DateDiff: 94 return exp.DateDiff( 95 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 96 ) 97 98 99def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 100 def _builder(args: t.List) -> E: 101 return expr_type( 102 this=seq_get(args, 2), 103 expression=seq_get(args, 1), 104 unit=map_date_part(seq_get(args, 0)), 105 ) 106 107 return _builder 108 109 110def _build_bitor(args: t.List) -> exp.BitwiseOr | exp.Anonymous: 111 if len(args) == 3: 112 return exp.Anonymous(this="BITOR", expressions=args) 113 114 return binary_from_function(exp.BitwiseOr)(args) 115 116 117# https://docs.snowflake.com/en/sql-reference/functions/div0 118def _build_if_from_div0(args: t.List) -> exp.If: 119 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 120 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 121 122 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 123 exp.Is(this=lhs, expression=exp.null()).not_() 124 ) 125 true = exp.Literal.number(0) 126 false = exp.Div(this=lhs, expression=rhs) 127 return exp.If(this=cond, true=true, false=false) 128 129 130# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 131def _build_if_from_zeroifnull(args: t.List) -> exp.If: 132 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 133 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 134 135 136# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 137def _build_if_from_nullifzero(args: t.List) -> exp.If: 138 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 139 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 140 141 142def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 143 flag = expression.text("flag") 144 145 if "i" not in flag: 146 flag += "i" 147 148 return self.func( 149 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 150 ) 151 152 153def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 154 regexp_replace = exp.RegexpReplace.from_arg_list(args) 155 156 if not regexp_replace.args.get("replacement"): 157 regexp_replace.set("replacement", exp.Literal.string("")) 158 159 return regexp_replace 160 161 162def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 163 def _parse(self: Snowflake.Parser) -> exp.Show: 164 return self._parse_show_snowflake(*args, **kwargs) 165 166 return _parse 167 168 169def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 170 trunc = date_trunc_to_time(args) 171 trunc.set("unit", map_date_part(trunc.args["unit"])) 172 return trunc 173 174 175def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 176 """ 177 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 178 so we need to unqualify them. 179 180 Example: 181 >>> from sqlglot import parse_one 182 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 183 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 184 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 185 """ 186 if isinstance(expression, exp.Pivot) and expression.unpivot: 187 expression = transforms.unqualify_columns(expression) 188 189 return expression 190 191 192def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 193 assert isinstance(expression, exp.Create) 194 195 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 196 if expression.this in exp.DataType.NESTED_TYPES: 197 expression.set("expressions", None) 198 return expression 199 200 props = expression.args.get("properties") 201 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 202 for schema_expression in expression.this.expressions: 203 if isinstance(schema_expression, exp.ColumnDef): 204 column_type = schema_expression.kind 205 if isinstance(column_type, exp.DataType): 206 column_type.transform(_flatten_structured_type, copy=False) 207 208 return expression 209 210 211def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 212 generate_date_array = unnest.expressions[0] 213 start = generate_date_array.args.get("start") 214 end = generate_date_array.args.get("end") 215 step = generate_date_array.args.get("step") 216 217 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 218 return 219 220 unit = step.args.get("unit") 221 222 unnest_alias = unnest.args.get("alias") 223 if unnest_alias: 224 unnest_alias = unnest_alias.copy() 225 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 226 else: 227 sequence_value_name = "value" 228 229 # We'll add the next sequence value to the starting date and project the result 230 date_add = _build_date_time_add(exp.DateAdd)( 231 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 232 ).as_(sequence_value_name) 233 234 # We use DATEDIFF to compute the number of sequence values needed 235 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 236 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 237 ) 238 239 unnest.set("expressions", [number_sequence]) 240 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 241 242 243def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 244 if isinstance(expression, exp.Select): 245 for generate_date_array in expression.find_all(exp.GenerateDateArray): 246 parent = generate_date_array.parent 247 248 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 249 # query is the following (it'll be unnested properly on the next iteration due to copy): 250 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 251 if not isinstance(parent, exp.Unnest): 252 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 253 generate_date_array.replace( 254 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 255 ) 256 257 if ( 258 isinstance(parent, exp.Unnest) 259 and isinstance(parent.parent, (exp.From, exp.Join)) 260 and len(parent.expressions) == 1 261 ): 262 _unnest_generate_date_array(parent) 263 264 return expression 265 266 267def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 268 def _builder(args: t.List) -> E: 269 return expr_type( 270 this=seq_get(args, 0), 271 expression=seq_get(args, 1), 272 position=seq_get(args, 2), 273 occurrence=seq_get(args, 3), 274 parameters=seq_get(args, 4), 275 group=seq_get(args, 5) or exp.Literal.number(0), 276 ) 277 278 return _builder 279 280 281def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 282 # Other dialects don't support all of the following parameters, so we need to 283 # generate default values as necessary to ensure the transpilation is correct 284 group = expression.args.get("group") 285 286 # To avoid generating all these default values, we set group to None if 287 # it's 0 (also default value) which doesn't trigger the following chain 288 if group and group.name == "0": 289 group = None 290 291 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 292 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 293 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 294 295 return self.func( 296 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 297 expression.this, 298 expression.expression, 299 position, 300 occurrence, 301 parameters, 302 group, 303 ) 304 305 306def _json_extract_value_array_sql( 307 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 308) -> str: 309 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 310 ident = exp.to_identifier("x") 311 312 if isinstance(expression, exp.JSONValueArray): 313 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 314 else: 315 this = exp.ParseJSON(this=f"TO_JSON({ident})") 316 317 transform_lambda = exp.Lambda(expressions=[ident], this=this) 318 319 return self.func("TRANSFORM", json_extract, transform_lambda) 320 321 322class Snowflake(Dialect): 323 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 324 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 325 NULL_ORDERING = "nulls_are_large" 326 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 327 SUPPORTS_USER_DEFINED_TYPES = False 328 SUPPORTS_SEMI_ANTI_JOIN = False 329 PREFER_CTE_ALIAS_COLUMN = True 330 TABLESAMPLE_SIZE_IS_PERCENT = True 331 COPY_PARAMS_ARE_CSV = False 332 ARRAY_AGG_INCLUDES_NULLS = None 333 334 TIME_MAPPING = { 335 "YYYY": "%Y", 336 "yyyy": "%Y", 337 "YY": "%y", 338 "yy": "%y", 339 "MMMM": "%B", 340 "mmmm": "%B", 341 "MON": "%b", 342 "mon": "%b", 343 "MM": "%m", 344 "mm": "%m", 345 "DD": "%d", 346 "dd": "%-d", 347 "DY": "%a", 348 "dy": "%w", 349 "HH24": "%H", 350 "hh24": "%H", 351 "HH12": "%I", 352 "hh12": "%I", 353 "MI": "%M", 354 "mi": "%M", 355 "SS": "%S", 356 "ss": "%S", 357 "FF": "%f", 358 "ff": "%f", 359 "FF6": "%f", 360 "ff6": "%f", 361 } 362 363 def quote_identifier(self, expression: E, identify: bool = True) -> E: 364 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 365 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 366 if ( 367 isinstance(expression, exp.Identifier) 368 and isinstance(expression.parent, exp.Table) 369 and expression.name.lower() == "dual" 370 ): 371 return expression # type: ignore 372 373 return super().quote_identifier(expression, identify=identify) 374 375 class Parser(parser.Parser): 376 IDENTIFY_PIVOT_STRINGS = True 377 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 378 COLON_IS_VARIANT_EXTRACT = True 379 380 ID_VAR_TOKENS = { 381 *parser.Parser.ID_VAR_TOKENS, 382 TokenType.MATCH_CONDITION, 383 } 384 385 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 386 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 387 388 FUNCTIONS = { 389 **parser.Parser.FUNCTIONS, 390 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 391 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 392 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 393 this=seq_get(args, 1), expression=seq_get(args, 0) 394 ), 395 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 396 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 397 start=seq_get(args, 0), 398 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 399 step=seq_get(args, 2), 400 ), 401 "BITXOR": binary_from_function(exp.BitwiseXor), 402 "BIT_XOR": binary_from_function(exp.BitwiseXor), 403 "BITOR": _build_bitor, 404 "BIT_OR": _build_bitor, 405 "BOOLXOR": binary_from_function(exp.Xor), 406 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 407 "DATE_TRUNC": _date_trunc_to_time, 408 "DATEADD": _build_date_time_add(exp.DateAdd), 409 "DATEDIFF": _build_datediff, 410 "DIV0": _build_if_from_div0, 411 "EDITDISTANCE": lambda args: exp.Levenshtein( 412 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 413 ), 414 "FLATTEN": exp.Explode.from_arg_list, 415 "GET_PATH": lambda args, dialect: exp.JSONExtract( 416 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 417 ), 418 "IFF": exp.If.from_arg_list, 419 "LAST_DAY": lambda args: exp.LastDay( 420 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 421 ), 422 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 423 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 424 "LISTAGG": exp.GroupConcat.from_arg_list, 425 "NULLIFZERO": _build_if_from_nullifzero, 426 "OBJECT_CONSTRUCT": _build_object_construct, 427 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 428 "REGEXP_REPLACE": _build_regexp_replace, 429 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 430 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 431 "RLIKE": exp.RegexpLike.from_arg_list, 432 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 433 "TIMEADD": _build_date_time_add(exp.TimeAdd), 434 "TIMEDIFF": _build_datediff, 435 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 436 "TIMESTAMPDIFF": _build_datediff, 437 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 438 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 439 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 440 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 441 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 442 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 443 "TRY_TO_TIMESTAMP": _build_datetime( 444 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 445 ), 446 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 447 "TO_NUMBER": lambda args: exp.ToNumber( 448 this=seq_get(args, 0), 449 format=seq_get(args, 1), 450 precision=seq_get(args, 2), 451 scale=seq_get(args, 3), 452 ), 453 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 454 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 455 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 456 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 457 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 458 "TO_VARCHAR": exp.ToChar.from_arg_list, 459 "ZEROIFNULL": _build_if_from_zeroifnull, 460 } 461 462 FUNCTION_PARSERS = { 463 **parser.Parser.FUNCTION_PARSERS, 464 "DATE_PART": lambda self: self._parse_date_part(), 465 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 466 } 467 FUNCTION_PARSERS.pop("TRIM") 468 469 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 470 471 RANGE_PARSERS = { 472 **parser.Parser.RANGE_PARSERS, 473 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 474 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 475 } 476 477 ALTER_PARSERS = { 478 **parser.Parser.ALTER_PARSERS, 479 "UNSET": lambda self: self.expression( 480 exp.Set, 481 tag=self._match_text_seq("TAG"), 482 expressions=self._parse_csv(self._parse_id_var), 483 unset=True, 484 ), 485 } 486 487 STATEMENT_PARSERS = { 488 **parser.Parser.STATEMENT_PARSERS, 489 TokenType.SHOW: lambda self: self._parse_show(), 490 } 491 492 PROPERTY_PARSERS = { 493 **parser.Parser.PROPERTY_PARSERS, 494 "LOCATION": lambda self: self._parse_location_property(), 495 "TAG": lambda self: self._parse_tag(), 496 } 497 498 TYPE_CONVERTERS = { 499 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 500 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 501 } 502 503 SHOW_PARSERS = { 504 "SCHEMAS": _show_parser("SCHEMAS"), 505 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 506 "OBJECTS": _show_parser("OBJECTS"), 507 "TERSE OBJECTS": _show_parser("OBJECTS"), 508 "TABLES": _show_parser("TABLES"), 509 "TERSE TABLES": _show_parser("TABLES"), 510 "VIEWS": _show_parser("VIEWS"), 511 "TERSE VIEWS": _show_parser("VIEWS"), 512 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 513 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 514 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 515 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 516 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 517 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 518 "SEQUENCES": _show_parser("SEQUENCES"), 519 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 520 "COLUMNS": _show_parser("COLUMNS"), 521 "USERS": _show_parser("USERS"), 522 "TERSE USERS": _show_parser("USERS"), 523 } 524 525 CONSTRAINT_PARSERS = { 526 **parser.Parser.CONSTRAINT_PARSERS, 527 "WITH": lambda self: self._parse_with_constraint(), 528 "MASKING": lambda self: self._parse_with_constraint(), 529 "PROJECTION": lambda self: self._parse_with_constraint(), 530 "TAG": lambda self: self._parse_with_constraint(), 531 } 532 533 STAGED_FILE_SINGLE_TOKENS = { 534 TokenType.DOT, 535 TokenType.MOD, 536 TokenType.SLASH, 537 } 538 539 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 540 541 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 542 543 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 544 545 LAMBDAS = { 546 **parser.Parser.LAMBDAS, 547 TokenType.ARROW: lambda self, expressions: self.expression( 548 exp.Lambda, 549 this=self._replace_lambda( 550 self._parse_assignment(), 551 expressions, 552 ), 553 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 554 ), 555 } 556 557 def _negate_range( 558 self, this: t.Optional[exp.Expression] = None 559 ) -> t.Optional[exp.Expression]: 560 if not this: 561 return this 562 563 query = this.args.get("query") 564 if isinstance(this, exp.In) and isinstance(query, exp.Query): 565 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 566 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 567 # which can produce different results (most likely a SnowFlake bug). 568 # 569 # https://docs.snowflake.com/en/sql-reference/functions/in 570 # Context: https://github.com/tobymao/sqlglot/issues/3890 571 return self.expression( 572 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 573 ) 574 575 return self.expression(exp.Not, this=this) 576 577 def _parse_tag(self) -> exp.Tags: 578 return self.expression( 579 exp.Tags, 580 expressions=self._parse_wrapped_csv(self._parse_property), 581 ) 582 583 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 584 if self._prev.token_type != TokenType.WITH: 585 self._retreat(self._index - 1) 586 587 if self._match_text_seq("MASKING", "POLICY"): 588 policy = self._parse_column() 589 return self.expression( 590 exp.MaskingPolicyColumnConstraint, 591 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 592 expressions=self._match(TokenType.USING) 593 and self._parse_wrapped_csv(self._parse_id_var), 594 ) 595 if self._match_text_seq("PROJECTION", "POLICY"): 596 policy = self._parse_column() 597 return self.expression( 598 exp.ProjectionPolicyColumnConstraint, 599 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 600 ) 601 if self._match(TokenType.TAG): 602 return self._parse_tag() 603 604 return None 605 606 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 607 if self._match(TokenType.TAG): 608 return self._parse_tag() 609 610 return super()._parse_with_property() 611 612 def _parse_create(self) -> exp.Create | exp.Command: 613 expression = super()._parse_create() 614 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 615 # Replace the Table node with the enclosed Identifier 616 expression.this.replace(expression.this.this) 617 618 return expression 619 620 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 621 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 622 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 623 this = self._parse_var() or self._parse_type() 624 625 if not this: 626 return None 627 628 self._match(TokenType.COMMA) 629 expression = self._parse_bitwise() 630 this = map_date_part(this) 631 name = this.name.upper() 632 633 if name.startswith("EPOCH"): 634 if name == "EPOCH_MILLISECOND": 635 scale = 10**3 636 elif name == "EPOCH_MICROSECOND": 637 scale = 10**6 638 elif name == "EPOCH_NANOSECOND": 639 scale = 10**9 640 else: 641 scale = None 642 643 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 644 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 645 646 if scale: 647 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 648 649 return to_unix 650 651 return self.expression(exp.Extract, this=this, expression=expression) 652 653 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 654 if is_map: 655 # Keys are strings in Snowflake's objects, see also: 656 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 657 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 658 return self._parse_slice(self._parse_string()) 659 660 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 661 662 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 663 lateral = super()._parse_lateral() 664 if not lateral: 665 return lateral 666 667 if isinstance(lateral.this, exp.Explode): 668 table_alias = lateral.args.get("alias") 669 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 670 if table_alias and not table_alias.args.get("columns"): 671 table_alias.set("columns", columns) 672 elif not table_alias: 673 exp.alias_(lateral, "_flattened", table=columns, copy=False) 674 675 return lateral 676 677 def _parse_table_parts( 678 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 679 ) -> exp.Table: 680 # https://docs.snowflake.com/en/user-guide/querying-stage 681 if self._match(TokenType.STRING, advance=False): 682 table = self._parse_string() 683 elif self._match_text_seq("@", advance=False): 684 table = self._parse_location_path() 685 else: 686 table = None 687 688 if table: 689 file_format = None 690 pattern = None 691 692 wrapped = self._match(TokenType.L_PAREN) 693 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 694 if self._match_text_seq("FILE_FORMAT", "=>"): 695 file_format = self._parse_string() or super()._parse_table_parts( 696 is_db_reference=is_db_reference 697 ) 698 elif self._match_text_seq("PATTERN", "=>"): 699 pattern = self._parse_string() 700 else: 701 break 702 703 self._match(TokenType.COMMA) 704 705 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 706 else: 707 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 708 709 return table 710 711 def _parse_id_var( 712 self, 713 any_token: bool = True, 714 tokens: t.Optional[t.Collection[TokenType]] = None, 715 ) -> t.Optional[exp.Expression]: 716 if self._match_text_seq("IDENTIFIER", "("): 717 identifier = ( 718 super()._parse_id_var(any_token=any_token, tokens=tokens) 719 or self._parse_string() 720 ) 721 self._match_r_paren() 722 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 723 724 return super()._parse_id_var(any_token=any_token, tokens=tokens) 725 726 def _parse_show_snowflake(self, this: str) -> exp.Show: 727 scope = None 728 scope_kind = None 729 730 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 731 # which is syntactically valid but has no effect on the output 732 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 733 734 history = self._match_text_seq("HISTORY") 735 736 like = self._parse_string() if self._match(TokenType.LIKE) else None 737 738 if self._match(TokenType.IN): 739 if self._match_text_seq("ACCOUNT"): 740 scope_kind = "ACCOUNT" 741 elif self._match_set(self.DB_CREATABLES): 742 scope_kind = self._prev.text.upper() 743 if self._curr: 744 scope = self._parse_table_parts() 745 elif self._curr: 746 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 747 scope = self._parse_table_parts() 748 749 return self.expression( 750 exp.Show, 751 **{ 752 "terse": terse, 753 "this": this, 754 "history": history, 755 "like": like, 756 "scope": scope, 757 "scope_kind": scope_kind, 758 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 759 "limit": self._parse_limit(), 760 "from": self._parse_string() if self._match(TokenType.FROM) else None, 761 }, 762 ) 763 764 def _parse_location_property(self) -> exp.LocationProperty: 765 self._match(TokenType.EQ) 766 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 767 768 def _parse_file_location(self) -> t.Optional[exp.Expression]: 769 # Parse either a subquery or a staged file 770 return ( 771 self._parse_select(table=True, parse_subquery_alias=False) 772 if self._match(TokenType.L_PAREN, advance=False) 773 else self._parse_table_parts() 774 ) 775 776 def _parse_location_path(self) -> exp.Var: 777 parts = [self._advance_any(ignore_reserved=True)] 778 779 # We avoid consuming a comma token because external tables like @foo and @bar 780 # can be joined in a query with a comma separator, as well as closing paren 781 # in case of subqueries 782 while self._is_connected() and not self._match_set( 783 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 784 ): 785 parts.append(self._advance_any(ignore_reserved=True)) 786 787 return exp.var("".join(part.text for part in parts if part)) 788 789 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 790 this = super()._parse_lambda_arg() 791 792 if not this: 793 return this 794 795 typ = self._parse_types() 796 797 if typ: 798 return self.expression(exp.Cast, this=this, to=typ) 799 800 return this 801 802 def _parse_foreign_key(self) -> exp.ForeignKey: 803 # inlineFK, the REFERENCES columns are implied 804 if self._match(TokenType.REFERENCES, advance=False): 805 return self.expression(exp.ForeignKey) 806 807 # outoflineFK, explicitly names the columns 808 return super()._parse_foreign_key() 809 810 class Tokenizer(tokens.Tokenizer): 811 STRING_ESCAPES = ["\\", "'"] 812 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 813 RAW_STRINGS = ["$$"] 814 COMMENTS = ["--", "//", ("/*", "*/")] 815 NESTED_COMMENTS = False 816 817 KEYWORDS = { 818 **tokens.Tokenizer.KEYWORDS, 819 "BYTEINT": TokenType.INT, 820 "CHAR VARYING": TokenType.VARCHAR, 821 "CHARACTER VARYING": TokenType.VARCHAR, 822 "EXCLUDE": TokenType.EXCEPT, 823 "ILIKE ANY": TokenType.ILIKE_ANY, 824 "LIKE ANY": TokenType.LIKE_ANY, 825 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 826 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 827 "MINUS": TokenType.EXCEPT, 828 "NCHAR VARYING": TokenType.VARCHAR, 829 "PUT": TokenType.COMMAND, 830 "REMOVE": TokenType.COMMAND, 831 "RM": TokenType.COMMAND, 832 "SAMPLE": TokenType.TABLE_SAMPLE, 833 "SQL_DOUBLE": TokenType.DOUBLE, 834 "SQL_VARCHAR": TokenType.VARCHAR, 835 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 836 "TAG": TokenType.TAG, 837 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 838 "TOP": TokenType.TOP, 839 "WAREHOUSE": TokenType.WAREHOUSE, 840 "STREAMLIT": TokenType.STREAMLIT, 841 } 842 KEYWORDS.pop("/*+") 843 844 SINGLE_TOKENS = { 845 **tokens.Tokenizer.SINGLE_TOKENS, 846 "$": TokenType.PARAMETER, 847 } 848 849 VAR_SINGLE_TOKENS = {"$"} 850 851 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 852 853 class Generator(generator.Generator): 854 PARAMETER_TOKEN = "$" 855 MATCHED_BY_SOURCE = False 856 SINGLE_STRING_INTERVAL = True 857 JOIN_HINTS = False 858 TABLE_HINTS = False 859 QUERY_HINTS = False 860 AGGREGATE_FILTER_SUPPORTED = False 861 SUPPORTS_TABLE_COPY = False 862 COLLATE_IS_FUNC = True 863 LIMIT_ONLY_LITERALS = True 864 JSON_KEY_VALUE_PAIR_SEP = "," 865 INSERT_OVERWRITE = " OVERWRITE INTO" 866 STRUCT_DELIMITER = ("(", ")") 867 COPY_PARAMS_ARE_WRAPPED = False 868 COPY_PARAMS_EQ_REQUIRED = True 869 STAR_EXCEPT = "EXCLUDE" 870 SUPPORTS_EXPLODING_PROJECTIONS = False 871 ARRAY_CONCAT_IS_VAR_LEN = False 872 SUPPORTS_CONVERT_TIMEZONE = True 873 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 874 SUPPORTS_MEDIAN = True 875 ARRAY_SIZE_NAME = "ARRAY_SIZE" 876 877 TRANSFORMS = { 878 **generator.Generator.TRANSFORMS, 879 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 880 exp.ArgMax: rename_func("MAX_BY"), 881 exp.ArgMin: rename_func("MIN_BY"), 882 exp.Array: inline_array_sql, 883 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 884 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 885 exp.AtTimeZone: lambda self, e: self.func( 886 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 887 ), 888 exp.BitwiseXor: rename_func("BITXOR"), 889 exp.BitwiseOr: rename_func("BITOR"), 890 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 891 exp.DateAdd: date_delta_sql("DATEADD"), 892 exp.DateDiff: date_delta_sql("DATEDIFF"), 893 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 894 exp.DatetimeDiff: timestampdiff_sql, 895 exp.DateStrToDate: datestrtodate_sql, 896 exp.DayOfMonth: rename_func("DAYOFMONTH"), 897 exp.DayOfWeek: rename_func("DAYOFWEEK"), 898 exp.DayOfYear: rename_func("DAYOFYEAR"), 899 exp.Explode: rename_func("FLATTEN"), 900 exp.Extract: rename_func("DATE_PART"), 901 exp.FromTimeZone: lambda self, e: self.func( 902 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 903 ), 904 exp.GenerateSeries: lambda self, e: self.func( 905 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 906 ), 907 exp.GroupConcat: rename_func("LISTAGG"), 908 exp.If: if_sql(name="IFF", false_value="NULL"), 909 exp.JSONExtractArray: _json_extract_value_array_sql, 910 exp.JSONExtractScalar: lambda self, e: self.func( 911 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 912 ), 913 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 914 exp.JSONPathRoot: lambda *_: "", 915 exp.JSONValueArray: _json_extract_value_array_sql, 916 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 917 exp.LogicalOr: rename_func("BOOLOR_AGG"), 918 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 919 exp.MakeInterval: no_make_interval_sql, 920 exp.Max: max_or_greatest, 921 exp.Min: min_or_least, 922 exp.ParseJSON: lambda self, e: self.func( 923 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 924 ), 925 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 926 exp.PercentileCont: transforms.preprocess( 927 [transforms.add_within_group_for_percentiles] 928 ), 929 exp.PercentileDisc: transforms.preprocess( 930 [transforms.add_within_group_for_percentiles] 931 ), 932 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 933 exp.RegexpExtract: _regexpextract_sql, 934 exp.RegexpExtractAll: _regexpextract_sql, 935 exp.RegexpILike: _regexpilike_sql, 936 exp.Rand: rename_func("RANDOM"), 937 exp.Select: transforms.preprocess( 938 [ 939 transforms.eliminate_distinct_on, 940 transforms.explode_to_unnest(), 941 transforms.eliminate_semi_and_anti_joins, 942 _transform_generate_date_array, 943 ] 944 ), 945 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 946 exp.SHA: rename_func("SHA1"), 947 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 948 exp.StartsWith: rename_func("STARTSWITH"), 949 exp.StrPosition: lambda self, e: self.func( 950 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 951 ), 952 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 953 exp.Stuff: rename_func("INSERT"), 954 exp.TimeAdd: date_delta_sql("TIMEADD"), 955 exp.Timestamp: no_timestamp_sql, 956 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 957 exp.TimestampDiff: lambda self, e: self.func( 958 "TIMESTAMPDIFF", e.unit, e.expression, e.this 959 ), 960 exp.TimestampTrunc: timestamptrunc_sql(), 961 exp.TimeStrToTime: timestrtotime_sql, 962 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 963 exp.ToArray: rename_func("TO_ARRAY"), 964 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 965 exp.ToDouble: rename_func("TO_DOUBLE"), 966 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 967 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 968 exp.TsOrDsToDate: lambda self, e: self.func( 969 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 970 ), 971 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 972 exp.Uuid: rename_func("UUID_STRING"), 973 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 974 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 975 exp.Xor: rename_func("BOOLXOR"), 976 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 977 rename_func("EDITDISTANCE") 978 ), 979 } 980 981 SUPPORTED_JSON_PATH_PARTS = { 982 exp.JSONPathKey, 983 exp.JSONPathRoot, 984 exp.JSONPathSubscript, 985 } 986 987 TYPE_MAPPING = { 988 **generator.Generator.TYPE_MAPPING, 989 exp.DataType.Type.NESTED: "OBJECT", 990 exp.DataType.Type.STRUCT: "OBJECT", 991 } 992 993 PROPERTIES_LOCATION = { 994 **generator.Generator.PROPERTIES_LOCATION, 995 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 996 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 997 } 998 999 UNSUPPORTED_VALUES_EXPRESSIONS = { 1000 exp.Map, 1001 exp.StarMap, 1002 exp.Struct, 1003 exp.VarMap, 1004 } 1005 1006 def with_properties(self, properties: exp.Properties) -> str: 1007 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1008 1009 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1010 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1011 values_as_table = False 1012 1013 return super().values_sql(expression, values_as_table=values_as_table) 1014 1015 def datatype_sql(self, expression: exp.DataType) -> str: 1016 expressions = expression.expressions 1017 if ( 1018 expressions 1019 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1020 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1021 ): 1022 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1023 return "OBJECT" 1024 1025 return super().datatype_sql(expression) 1026 1027 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1028 return self.func( 1029 "TO_NUMBER", 1030 expression.this, 1031 expression.args.get("format"), 1032 expression.args.get("precision"), 1033 expression.args.get("scale"), 1034 ) 1035 1036 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1037 milli = expression.args.get("milli") 1038 if milli is not None: 1039 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1040 expression.set("nano", milli_to_nano) 1041 1042 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1043 1044 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1045 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1046 return self.func("TO_GEOGRAPHY", expression.this) 1047 if expression.is_type(exp.DataType.Type.GEOMETRY): 1048 return self.func("TO_GEOMETRY", expression.this) 1049 1050 return super().cast_sql(expression, safe_prefix=safe_prefix) 1051 1052 def trycast_sql(self, expression: exp.TryCast) -> str: 1053 value = expression.this 1054 1055 if value.type is None: 1056 from sqlglot.optimizer.annotate_types import annotate_types 1057 1058 value = annotate_types(value) 1059 1060 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1061 return super().trycast_sql(expression) 1062 1063 # TRY_CAST only works for string values in Snowflake 1064 return self.cast_sql(expression) 1065 1066 def log_sql(self, expression: exp.Log) -> str: 1067 if not expression.expression: 1068 return self.func("LN", expression.this) 1069 1070 return super().log_sql(expression) 1071 1072 def unnest_sql(self, expression: exp.Unnest) -> str: 1073 unnest_alias = expression.args.get("alias") 1074 offset = expression.args.get("offset") 1075 1076 columns = [ 1077 exp.to_identifier("seq"), 1078 exp.to_identifier("key"), 1079 exp.to_identifier("path"), 1080 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1081 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1082 or exp.to_identifier("value"), 1083 exp.to_identifier("this"), 1084 ] 1085 1086 if unnest_alias: 1087 unnest_alias.set("columns", columns) 1088 else: 1089 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1090 1091 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1092 alias = self.sql(unnest_alias) 1093 alias = f" AS {alias}" if alias else "" 1094 return f"{explode}{alias}" 1095 1096 def show_sql(self, expression: exp.Show) -> str: 1097 terse = "TERSE " if expression.args.get("terse") else "" 1098 history = " HISTORY" if expression.args.get("history") else "" 1099 like = self.sql(expression, "like") 1100 like = f" LIKE {like}" if like else "" 1101 1102 scope = self.sql(expression, "scope") 1103 scope = f" {scope}" if scope else "" 1104 1105 scope_kind = self.sql(expression, "scope_kind") 1106 if scope_kind: 1107 scope_kind = f" IN {scope_kind}" 1108 1109 starts_with = self.sql(expression, "starts_with") 1110 if starts_with: 1111 starts_with = f" STARTS WITH {starts_with}" 1112 1113 limit = self.sql(expression, "limit") 1114 1115 from_ = self.sql(expression, "from") 1116 if from_: 1117 from_ = f" FROM {from_}" 1118 1119 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1120 1121 def describe_sql(self, expression: exp.Describe) -> str: 1122 # Default to table if kind is unknown 1123 kind_value = expression.args.get("kind") or "TABLE" 1124 kind = f" {kind_value}" if kind_value else "" 1125 this = f" {self.sql(expression, 'this')}" 1126 expressions = self.expressions(expression, flat=True) 1127 expressions = f" {expressions}" if expressions else "" 1128 return f"DESCRIBE{kind}{this}{expressions}" 1129 1130 def generatedasidentitycolumnconstraint_sql( 1131 self, expression: exp.GeneratedAsIdentityColumnConstraint 1132 ) -> str: 1133 start = expression.args.get("start") 1134 start = f" START {start}" if start else "" 1135 increment = expression.args.get("increment") 1136 increment = f" INCREMENT {increment}" if increment else "" 1137 return f"AUTOINCREMENT{start}{increment}" 1138 1139 def cluster_sql(self, expression: exp.Cluster) -> str: 1140 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1141 1142 def struct_sql(self, expression: exp.Struct) -> str: 1143 keys = [] 1144 values = [] 1145 1146 for i, e in enumerate(expression.expressions): 1147 if isinstance(e, exp.PropertyEQ): 1148 keys.append( 1149 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1150 ) 1151 values.append(e.expression) 1152 else: 1153 keys.append(exp.Literal.string(f"_{i}")) 1154 values.append(e) 1155 1156 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1157 1158 @unsupported_args("weight", "accuracy") 1159 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1160 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1161 1162 def alterset_sql(self, expression: exp.AlterSet) -> str: 1163 exprs = self.expressions(expression, flat=True) 1164 exprs = f" {exprs}" if exprs else "" 1165 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1166 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1167 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1168 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1169 tag = self.expressions(expression, key="tag", flat=True) 1170 tag = f" TAG {tag}" if tag else "" 1171 1172 return f"SET{exprs}{file_format}{copy_options}{tag}" 1173 1174 def strtotime_sql(self, expression: exp.StrToTime): 1175 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1176 return self.func( 1177 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1178 ) 1179 1180 def timestampsub_sql(self, expression: exp.TimestampSub): 1181 return self.sql( 1182 exp.TimestampAdd( 1183 this=expression.this, 1184 expression=expression.expression * -1, 1185 unit=expression.unit, 1186 ) 1187 ) 1188 1189 def jsonextract_sql(self, expression: exp.JSONExtract): 1190 this = expression.this 1191 1192 # JSON strings are valid coming from other dialects such as BQ 1193 return self.func( 1194 "GET_PATH", 1195 exp.ParseJSON(this=this) if this.is_string else this, 1196 expression.expression, 1197 ) 1198 1199 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1200 this = expression.this 1201 if not isinstance(this, exp.TsOrDsToTimestamp): 1202 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1203 1204 return self.func("TO_CHAR", this, self.format_time(expression))
323class Snowflake(Dialect): 324 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 325 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 326 NULL_ORDERING = "nulls_are_large" 327 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 328 SUPPORTS_USER_DEFINED_TYPES = False 329 SUPPORTS_SEMI_ANTI_JOIN = False 330 PREFER_CTE_ALIAS_COLUMN = True 331 TABLESAMPLE_SIZE_IS_PERCENT = True 332 COPY_PARAMS_ARE_CSV = False 333 ARRAY_AGG_INCLUDES_NULLS = None 334 335 TIME_MAPPING = { 336 "YYYY": "%Y", 337 "yyyy": "%Y", 338 "YY": "%y", 339 "yy": "%y", 340 "MMMM": "%B", 341 "mmmm": "%B", 342 "MON": "%b", 343 "mon": "%b", 344 "MM": "%m", 345 "mm": "%m", 346 "DD": "%d", 347 "dd": "%-d", 348 "DY": "%a", 349 "dy": "%w", 350 "HH24": "%H", 351 "hh24": "%H", 352 "HH12": "%I", 353 "hh12": "%I", 354 "MI": "%M", 355 "mi": "%M", 356 "SS": "%S", 357 "ss": "%S", 358 "FF": "%f", 359 "ff": "%f", 360 "FF6": "%f", 361 "ff6": "%f", 362 } 363 364 def quote_identifier(self, expression: E, identify: bool = True) -> E: 365 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 366 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 367 if ( 368 isinstance(expression, exp.Identifier) 369 and isinstance(expression.parent, exp.Table) 370 and expression.name.lower() == "dual" 371 ): 372 return expression # type: ignore 373 374 return super().quote_identifier(expression, identify=identify) 375 376 class Parser(parser.Parser): 377 IDENTIFY_PIVOT_STRINGS = True 378 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 379 COLON_IS_VARIANT_EXTRACT = True 380 381 ID_VAR_TOKENS = { 382 *parser.Parser.ID_VAR_TOKENS, 383 TokenType.MATCH_CONDITION, 384 } 385 386 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 387 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 388 389 FUNCTIONS = { 390 **parser.Parser.FUNCTIONS, 391 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 392 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 393 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 394 this=seq_get(args, 1), expression=seq_get(args, 0) 395 ), 396 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 397 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 398 start=seq_get(args, 0), 399 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 400 step=seq_get(args, 2), 401 ), 402 "BITXOR": binary_from_function(exp.BitwiseXor), 403 "BIT_XOR": binary_from_function(exp.BitwiseXor), 404 "BITOR": _build_bitor, 405 "BIT_OR": _build_bitor, 406 "BOOLXOR": binary_from_function(exp.Xor), 407 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 408 "DATE_TRUNC": _date_trunc_to_time, 409 "DATEADD": _build_date_time_add(exp.DateAdd), 410 "DATEDIFF": _build_datediff, 411 "DIV0": _build_if_from_div0, 412 "EDITDISTANCE": lambda args: exp.Levenshtein( 413 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 414 ), 415 "FLATTEN": exp.Explode.from_arg_list, 416 "GET_PATH": lambda args, dialect: exp.JSONExtract( 417 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 418 ), 419 "IFF": exp.If.from_arg_list, 420 "LAST_DAY": lambda args: exp.LastDay( 421 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 422 ), 423 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 424 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 425 "LISTAGG": exp.GroupConcat.from_arg_list, 426 "NULLIFZERO": _build_if_from_nullifzero, 427 "OBJECT_CONSTRUCT": _build_object_construct, 428 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 429 "REGEXP_REPLACE": _build_regexp_replace, 430 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 431 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 432 "RLIKE": exp.RegexpLike.from_arg_list, 433 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 434 "TIMEADD": _build_date_time_add(exp.TimeAdd), 435 "TIMEDIFF": _build_datediff, 436 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 437 "TIMESTAMPDIFF": _build_datediff, 438 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 439 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 440 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 441 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 442 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 443 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 444 "TRY_TO_TIMESTAMP": _build_datetime( 445 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 446 ), 447 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 448 "TO_NUMBER": lambda args: exp.ToNumber( 449 this=seq_get(args, 0), 450 format=seq_get(args, 1), 451 precision=seq_get(args, 2), 452 scale=seq_get(args, 3), 453 ), 454 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 455 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 456 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 457 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 458 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 459 "TO_VARCHAR": exp.ToChar.from_arg_list, 460 "ZEROIFNULL": _build_if_from_zeroifnull, 461 } 462 463 FUNCTION_PARSERS = { 464 **parser.Parser.FUNCTION_PARSERS, 465 "DATE_PART": lambda self: self._parse_date_part(), 466 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 467 } 468 FUNCTION_PARSERS.pop("TRIM") 469 470 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 471 472 RANGE_PARSERS = { 473 **parser.Parser.RANGE_PARSERS, 474 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 475 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 476 } 477 478 ALTER_PARSERS = { 479 **parser.Parser.ALTER_PARSERS, 480 "UNSET": lambda self: self.expression( 481 exp.Set, 482 tag=self._match_text_seq("TAG"), 483 expressions=self._parse_csv(self._parse_id_var), 484 unset=True, 485 ), 486 } 487 488 STATEMENT_PARSERS = { 489 **parser.Parser.STATEMENT_PARSERS, 490 TokenType.SHOW: lambda self: self._parse_show(), 491 } 492 493 PROPERTY_PARSERS = { 494 **parser.Parser.PROPERTY_PARSERS, 495 "LOCATION": lambda self: self._parse_location_property(), 496 "TAG": lambda self: self._parse_tag(), 497 } 498 499 TYPE_CONVERTERS = { 500 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 501 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 502 } 503 504 SHOW_PARSERS = { 505 "SCHEMAS": _show_parser("SCHEMAS"), 506 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 507 "OBJECTS": _show_parser("OBJECTS"), 508 "TERSE OBJECTS": _show_parser("OBJECTS"), 509 "TABLES": _show_parser("TABLES"), 510 "TERSE TABLES": _show_parser("TABLES"), 511 "VIEWS": _show_parser("VIEWS"), 512 "TERSE VIEWS": _show_parser("VIEWS"), 513 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 514 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 515 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 516 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 517 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 518 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 519 "SEQUENCES": _show_parser("SEQUENCES"), 520 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 521 "COLUMNS": _show_parser("COLUMNS"), 522 "USERS": _show_parser("USERS"), 523 "TERSE USERS": _show_parser("USERS"), 524 } 525 526 CONSTRAINT_PARSERS = { 527 **parser.Parser.CONSTRAINT_PARSERS, 528 "WITH": lambda self: self._parse_with_constraint(), 529 "MASKING": lambda self: self._parse_with_constraint(), 530 "PROJECTION": lambda self: self._parse_with_constraint(), 531 "TAG": lambda self: self._parse_with_constraint(), 532 } 533 534 STAGED_FILE_SINGLE_TOKENS = { 535 TokenType.DOT, 536 TokenType.MOD, 537 TokenType.SLASH, 538 } 539 540 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 541 542 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 543 544 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 545 546 LAMBDAS = { 547 **parser.Parser.LAMBDAS, 548 TokenType.ARROW: lambda self, expressions: self.expression( 549 exp.Lambda, 550 this=self._replace_lambda( 551 self._parse_assignment(), 552 expressions, 553 ), 554 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 555 ), 556 } 557 558 def _negate_range( 559 self, this: t.Optional[exp.Expression] = None 560 ) -> t.Optional[exp.Expression]: 561 if not this: 562 return this 563 564 query = this.args.get("query") 565 if isinstance(this, exp.In) and isinstance(query, exp.Query): 566 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 567 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 568 # which can produce different results (most likely a SnowFlake bug). 569 # 570 # https://docs.snowflake.com/en/sql-reference/functions/in 571 # Context: https://github.com/tobymao/sqlglot/issues/3890 572 return self.expression( 573 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 574 ) 575 576 return self.expression(exp.Not, this=this) 577 578 def _parse_tag(self) -> exp.Tags: 579 return self.expression( 580 exp.Tags, 581 expressions=self._parse_wrapped_csv(self._parse_property), 582 ) 583 584 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 585 if self._prev.token_type != TokenType.WITH: 586 self._retreat(self._index - 1) 587 588 if self._match_text_seq("MASKING", "POLICY"): 589 policy = self._parse_column() 590 return self.expression( 591 exp.MaskingPolicyColumnConstraint, 592 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 593 expressions=self._match(TokenType.USING) 594 and self._parse_wrapped_csv(self._parse_id_var), 595 ) 596 if self._match_text_seq("PROJECTION", "POLICY"): 597 policy = self._parse_column() 598 return self.expression( 599 exp.ProjectionPolicyColumnConstraint, 600 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 601 ) 602 if self._match(TokenType.TAG): 603 return self._parse_tag() 604 605 return None 606 607 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 608 if self._match(TokenType.TAG): 609 return self._parse_tag() 610 611 return super()._parse_with_property() 612 613 def _parse_create(self) -> exp.Create | exp.Command: 614 expression = super()._parse_create() 615 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 616 # Replace the Table node with the enclosed Identifier 617 expression.this.replace(expression.this.this) 618 619 return expression 620 621 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 622 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 623 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 624 this = self._parse_var() or self._parse_type() 625 626 if not this: 627 return None 628 629 self._match(TokenType.COMMA) 630 expression = self._parse_bitwise() 631 this = map_date_part(this) 632 name = this.name.upper() 633 634 if name.startswith("EPOCH"): 635 if name == "EPOCH_MILLISECOND": 636 scale = 10**3 637 elif name == "EPOCH_MICROSECOND": 638 scale = 10**6 639 elif name == "EPOCH_NANOSECOND": 640 scale = 10**9 641 else: 642 scale = None 643 644 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 645 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 646 647 if scale: 648 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 649 650 return to_unix 651 652 return self.expression(exp.Extract, this=this, expression=expression) 653 654 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 655 if is_map: 656 # Keys are strings in Snowflake's objects, see also: 657 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 658 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 659 return self._parse_slice(self._parse_string()) 660 661 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 662 663 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 664 lateral = super()._parse_lateral() 665 if not lateral: 666 return lateral 667 668 if isinstance(lateral.this, exp.Explode): 669 table_alias = lateral.args.get("alias") 670 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 671 if table_alias and not table_alias.args.get("columns"): 672 table_alias.set("columns", columns) 673 elif not table_alias: 674 exp.alias_(lateral, "_flattened", table=columns, copy=False) 675 676 return lateral 677 678 def _parse_table_parts( 679 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 680 ) -> exp.Table: 681 # https://docs.snowflake.com/en/user-guide/querying-stage 682 if self._match(TokenType.STRING, advance=False): 683 table = self._parse_string() 684 elif self._match_text_seq("@", advance=False): 685 table = self._parse_location_path() 686 else: 687 table = None 688 689 if table: 690 file_format = None 691 pattern = None 692 693 wrapped = self._match(TokenType.L_PAREN) 694 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 695 if self._match_text_seq("FILE_FORMAT", "=>"): 696 file_format = self._parse_string() or super()._parse_table_parts( 697 is_db_reference=is_db_reference 698 ) 699 elif self._match_text_seq("PATTERN", "=>"): 700 pattern = self._parse_string() 701 else: 702 break 703 704 self._match(TokenType.COMMA) 705 706 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 707 else: 708 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 709 710 return table 711 712 def _parse_id_var( 713 self, 714 any_token: bool = True, 715 tokens: t.Optional[t.Collection[TokenType]] = None, 716 ) -> t.Optional[exp.Expression]: 717 if self._match_text_seq("IDENTIFIER", "("): 718 identifier = ( 719 super()._parse_id_var(any_token=any_token, tokens=tokens) 720 or self._parse_string() 721 ) 722 self._match_r_paren() 723 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 724 725 return super()._parse_id_var(any_token=any_token, tokens=tokens) 726 727 def _parse_show_snowflake(self, this: str) -> exp.Show: 728 scope = None 729 scope_kind = None 730 731 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 732 # which is syntactically valid but has no effect on the output 733 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 734 735 history = self._match_text_seq("HISTORY") 736 737 like = self._parse_string() if self._match(TokenType.LIKE) else None 738 739 if self._match(TokenType.IN): 740 if self._match_text_seq("ACCOUNT"): 741 scope_kind = "ACCOUNT" 742 elif self._match_set(self.DB_CREATABLES): 743 scope_kind = self._prev.text.upper() 744 if self._curr: 745 scope = self._parse_table_parts() 746 elif self._curr: 747 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 748 scope = self._parse_table_parts() 749 750 return self.expression( 751 exp.Show, 752 **{ 753 "terse": terse, 754 "this": this, 755 "history": history, 756 "like": like, 757 "scope": scope, 758 "scope_kind": scope_kind, 759 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 760 "limit": self._parse_limit(), 761 "from": self._parse_string() if self._match(TokenType.FROM) else None, 762 }, 763 ) 764 765 def _parse_location_property(self) -> exp.LocationProperty: 766 self._match(TokenType.EQ) 767 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 768 769 def _parse_file_location(self) -> t.Optional[exp.Expression]: 770 # Parse either a subquery or a staged file 771 return ( 772 self._parse_select(table=True, parse_subquery_alias=False) 773 if self._match(TokenType.L_PAREN, advance=False) 774 else self._parse_table_parts() 775 ) 776 777 def _parse_location_path(self) -> exp.Var: 778 parts = [self._advance_any(ignore_reserved=True)] 779 780 # We avoid consuming a comma token because external tables like @foo and @bar 781 # can be joined in a query with a comma separator, as well as closing paren 782 # in case of subqueries 783 while self._is_connected() and not self._match_set( 784 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 785 ): 786 parts.append(self._advance_any(ignore_reserved=True)) 787 788 return exp.var("".join(part.text for part in parts if part)) 789 790 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 791 this = super()._parse_lambda_arg() 792 793 if not this: 794 return this 795 796 typ = self._parse_types() 797 798 if typ: 799 return self.expression(exp.Cast, this=this, to=typ) 800 801 return this 802 803 def _parse_foreign_key(self) -> exp.ForeignKey: 804 # inlineFK, the REFERENCES columns are implied 805 if self._match(TokenType.REFERENCES, advance=False): 806 return self.expression(exp.ForeignKey) 807 808 # outoflineFK, explicitly names the columns 809 return super()._parse_foreign_key() 810 811 class Tokenizer(tokens.Tokenizer): 812 STRING_ESCAPES = ["\\", "'"] 813 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 814 RAW_STRINGS = ["$$"] 815 COMMENTS = ["--", "//", ("/*", "*/")] 816 NESTED_COMMENTS = False 817 818 KEYWORDS = { 819 **tokens.Tokenizer.KEYWORDS, 820 "BYTEINT": TokenType.INT, 821 "CHAR VARYING": TokenType.VARCHAR, 822 "CHARACTER VARYING": TokenType.VARCHAR, 823 "EXCLUDE": TokenType.EXCEPT, 824 "ILIKE ANY": TokenType.ILIKE_ANY, 825 "LIKE ANY": TokenType.LIKE_ANY, 826 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 827 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 828 "MINUS": TokenType.EXCEPT, 829 "NCHAR VARYING": TokenType.VARCHAR, 830 "PUT": TokenType.COMMAND, 831 "REMOVE": TokenType.COMMAND, 832 "RM": TokenType.COMMAND, 833 "SAMPLE": TokenType.TABLE_SAMPLE, 834 "SQL_DOUBLE": TokenType.DOUBLE, 835 "SQL_VARCHAR": TokenType.VARCHAR, 836 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 837 "TAG": TokenType.TAG, 838 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 839 "TOP": TokenType.TOP, 840 "WAREHOUSE": TokenType.WAREHOUSE, 841 "STREAMLIT": TokenType.STREAMLIT, 842 } 843 KEYWORDS.pop("/*+") 844 845 SINGLE_TOKENS = { 846 **tokens.Tokenizer.SINGLE_TOKENS, 847 "$": TokenType.PARAMETER, 848 } 849 850 VAR_SINGLE_TOKENS = {"$"} 851 852 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 853 854 class Generator(generator.Generator): 855 PARAMETER_TOKEN = "$" 856 MATCHED_BY_SOURCE = False 857 SINGLE_STRING_INTERVAL = True 858 JOIN_HINTS = False 859 TABLE_HINTS = False 860 QUERY_HINTS = False 861 AGGREGATE_FILTER_SUPPORTED = False 862 SUPPORTS_TABLE_COPY = False 863 COLLATE_IS_FUNC = True 864 LIMIT_ONLY_LITERALS = True 865 JSON_KEY_VALUE_PAIR_SEP = "," 866 INSERT_OVERWRITE = " OVERWRITE INTO" 867 STRUCT_DELIMITER = ("(", ")") 868 COPY_PARAMS_ARE_WRAPPED = False 869 COPY_PARAMS_EQ_REQUIRED = True 870 STAR_EXCEPT = "EXCLUDE" 871 SUPPORTS_EXPLODING_PROJECTIONS = False 872 ARRAY_CONCAT_IS_VAR_LEN = False 873 SUPPORTS_CONVERT_TIMEZONE = True 874 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 875 SUPPORTS_MEDIAN = True 876 ARRAY_SIZE_NAME = "ARRAY_SIZE" 877 878 TRANSFORMS = { 879 **generator.Generator.TRANSFORMS, 880 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 881 exp.ArgMax: rename_func("MAX_BY"), 882 exp.ArgMin: rename_func("MIN_BY"), 883 exp.Array: inline_array_sql, 884 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 885 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 886 exp.AtTimeZone: lambda self, e: self.func( 887 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 888 ), 889 exp.BitwiseXor: rename_func("BITXOR"), 890 exp.BitwiseOr: rename_func("BITOR"), 891 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 892 exp.DateAdd: date_delta_sql("DATEADD"), 893 exp.DateDiff: date_delta_sql("DATEDIFF"), 894 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 895 exp.DatetimeDiff: timestampdiff_sql, 896 exp.DateStrToDate: datestrtodate_sql, 897 exp.DayOfMonth: rename_func("DAYOFMONTH"), 898 exp.DayOfWeek: rename_func("DAYOFWEEK"), 899 exp.DayOfYear: rename_func("DAYOFYEAR"), 900 exp.Explode: rename_func("FLATTEN"), 901 exp.Extract: rename_func("DATE_PART"), 902 exp.FromTimeZone: lambda self, e: self.func( 903 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 904 ), 905 exp.GenerateSeries: lambda self, e: self.func( 906 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 907 ), 908 exp.GroupConcat: rename_func("LISTAGG"), 909 exp.If: if_sql(name="IFF", false_value="NULL"), 910 exp.JSONExtractArray: _json_extract_value_array_sql, 911 exp.JSONExtractScalar: lambda self, e: self.func( 912 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 913 ), 914 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 915 exp.JSONPathRoot: lambda *_: "", 916 exp.JSONValueArray: _json_extract_value_array_sql, 917 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 918 exp.LogicalOr: rename_func("BOOLOR_AGG"), 919 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 920 exp.MakeInterval: no_make_interval_sql, 921 exp.Max: max_or_greatest, 922 exp.Min: min_or_least, 923 exp.ParseJSON: lambda self, e: self.func( 924 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 925 ), 926 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 927 exp.PercentileCont: transforms.preprocess( 928 [transforms.add_within_group_for_percentiles] 929 ), 930 exp.PercentileDisc: transforms.preprocess( 931 [transforms.add_within_group_for_percentiles] 932 ), 933 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 934 exp.RegexpExtract: _regexpextract_sql, 935 exp.RegexpExtractAll: _regexpextract_sql, 936 exp.RegexpILike: _regexpilike_sql, 937 exp.Rand: rename_func("RANDOM"), 938 exp.Select: transforms.preprocess( 939 [ 940 transforms.eliminate_distinct_on, 941 transforms.explode_to_unnest(), 942 transforms.eliminate_semi_and_anti_joins, 943 _transform_generate_date_array, 944 ] 945 ), 946 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 947 exp.SHA: rename_func("SHA1"), 948 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 949 exp.StartsWith: rename_func("STARTSWITH"), 950 exp.StrPosition: lambda self, e: self.func( 951 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 952 ), 953 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 954 exp.Stuff: rename_func("INSERT"), 955 exp.TimeAdd: date_delta_sql("TIMEADD"), 956 exp.Timestamp: no_timestamp_sql, 957 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 958 exp.TimestampDiff: lambda self, e: self.func( 959 "TIMESTAMPDIFF", e.unit, e.expression, e.this 960 ), 961 exp.TimestampTrunc: timestamptrunc_sql(), 962 exp.TimeStrToTime: timestrtotime_sql, 963 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 964 exp.ToArray: rename_func("TO_ARRAY"), 965 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 966 exp.ToDouble: rename_func("TO_DOUBLE"), 967 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 968 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 969 exp.TsOrDsToDate: lambda self, e: self.func( 970 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 971 ), 972 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 973 exp.Uuid: rename_func("UUID_STRING"), 974 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 975 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 976 exp.Xor: rename_func("BOOLXOR"), 977 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 978 rename_func("EDITDISTANCE") 979 ), 980 } 981 982 SUPPORTED_JSON_PATH_PARTS = { 983 exp.JSONPathKey, 984 exp.JSONPathRoot, 985 exp.JSONPathSubscript, 986 } 987 988 TYPE_MAPPING = { 989 **generator.Generator.TYPE_MAPPING, 990 exp.DataType.Type.NESTED: "OBJECT", 991 exp.DataType.Type.STRUCT: "OBJECT", 992 } 993 994 PROPERTIES_LOCATION = { 995 **generator.Generator.PROPERTIES_LOCATION, 996 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 997 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 998 } 999 1000 UNSUPPORTED_VALUES_EXPRESSIONS = { 1001 exp.Map, 1002 exp.StarMap, 1003 exp.Struct, 1004 exp.VarMap, 1005 } 1006 1007 def with_properties(self, properties: exp.Properties) -> str: 1008 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1009 1010 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1011 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1012 values_as_table = False 1013 1014 return super().values_sql(expression, values_as_table=values_as_table) 1015 1016 def datatype_sql(self, expression: exp.DataType) -> str: 1017 expressions = expression.expressions 1018 if ( 1019 expressions 1020 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1021 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1022 ): 1023 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1024 return "OBJECT" 1025 1026 return super().datatype_sql(expression) 1027 1028 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1029 return self.func( 1030 "TO_NUMBER", 1031 expression.this, 1032 expression.args.get("format"), 1033 expression.args.get("precision"), 1034 expression.args.get("scale"), 1035 ) 1036 1037 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1038 milli = expression.args.get("milli") 1039 if milli is not None: 1040 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1041 expression.set("nano", milli_to_nano) 1042 1043 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1044 1045 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1046 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1047 return self.func("TO_GEOGRAPHY", expression.this) 1048 if expression.is_type(exp.DataType.Type.GEOMETRY): 1049 return self.func("TO_GEOMETRY", expression.this) 1050 1051 return super().cast_sql(expression, safe_prefix=safe_prefix) 1052 1053 def trycast_sql(self, expression: exp.TryCast) -> str: 1054 value = expression.this 1055 1056 if value.type is None: 1057 from sqlglot.optimizer.annotate_types import annotate_types 1058 1059 value = annotate_types(value) 1060 1061 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1062 return super().trycast_sql(expression) 1063 1064 # TRY_CAST only works for string values in Snowflake 1065 return self.cast_sql(expression) 1066 1067 def log_sql(self, expression: exp.Log) -> str: 1068 if not expression.expression: 1069 return self.func("LN", expression.this) 1070 1071 return super().log_sql(expression) 1072 1073 def unnest_sql(self, expression: exp.Unnest) -> str: 1074 unnest_alias = expression.args.get("alias") 1075 offset = expression.args.get("offset") 1076 1077 columns = [ 1078 exp.to_identifier("seq"), 1079 exp.to_identifier("key"), 1080 exp.to_identifier("path"), 1081 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1082 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1083 or exp.to_identifier("value"), 1084 exp.to_identifier("this"), 1085 ] 1086 1087 if unnest_alias: 1088 unnest_alias.set("columns", columns) 1089 else: 1090 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1091 1092 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1093 alias = self.sql(unnest_alias) 1094 alias = f" AS {alias}" if alias else "" 1095 return f"{explode}{alias}" 1096 1097 def show_sql(self, expression: exp.Show) -> str: 1098 terse = "TERSE " if expression.args.get("terse") else "" 1099 history = " HISTORY" if expression.args.get("history") else "" 1100 like = self.sql(expression, "like") 1101 like = f" LIKE {like}" if like else "" 1102 1103 scope = self.sql(expression, "scope") 1104 scope = f" {scope}" if scope else "" 1105 1106 scope_kind = self.sql(expression, "scope_kind") 1107 if scope_kind: 1108 scope_kind = f" IN {scope_kind}" 1109 1110 starts_with = self.sql(expression, "starts_with") 1111 if starts_with: 1112 starts_with = f" STARTS WITH {starts_with}" 1113 1114 limit = self.sql(expression, "limit") 1115 1116 from_ = self.sql(expression, "from") 1117 if from_: 1118 from_ = f" FROM {from_}" 1119 1120 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1121 1122 def describe_sql(self, expression: exp.Describe) -> str: 1123 # Default to table if kind is unknown 1124 kind_value = expression.args.get("kind") or "TABLE" 1125 kind = f" {kind_value}" if kind_value else "" 1126 this = f" {self.sql(expression, 'this')}" 1127 expressions = self.expressions(expression, flat=True) 1128 expressions = f" {expressions}" if expressions else "" 1129 return f"DESCRIBE{kind}{this}{expressions}" 1130 1131 def generatedasidentitycolumnconstraint_sql( 1132 self, expression: exp.GeneratedAsIdentityColumnConstraint 1133 ) -> str: 1134 start = expression.args.get("start") 1135 start = f" START {start}" if start else "" 1136 increment = expression.args.get("increment") 1137 increment = f" INCREMENT {increment}" if increment else "" 1138 return f"AUTOINCREMENT{start}{increment}" 1139 1140 def cluster_sql(self, expression: exp.Cluster) -> str: 1141 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1142 1143 def struct_sql(self, expression: exp.Struct) -> str: 1144 keys = [] 1145 values = [] 1146 1147 for i, e in enumerate(expression.expressions): 1148 if isinstance(e, exp.PropertyEQ): 1149 keys.append( 1150 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1151 ) 1152 values.append(e.expression) 1153 else: 1154 keys.append(exp.Literal.string(f"_{i}")) 1155 values.append(e) 1156 1157 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1158 1159 @unsupported_args("weight", "accuracy") 1160 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1161 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1162 1163 def alterset_sql(self, expression: exp.AlterSet) -> str: 1164 exprs = self.expressions(expression, flat=True) 1165 exprs = f" {exprs}" if exprs else "" 1166 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1167 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1168 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1169 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1170 tag = self.expressions(expression, key="tag", flat=True) 1171 tag = f" TAG {tag}" if tag else "" 1172 1173 return f"SET{exprs}{file_format}{copy_options}{tag}" 1174 1175 def strtotime_sql(self, expression: exp.StrToTime): 1176 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1177 return self.func( 1178 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1179 ) 1180 1181 def timestampsub_sql(self, expression: exp.TimestampSub): 1182 return self.sql( 1183 exp.TimestampAdd( 1184 this=expression.this, 1185 expression=expression.expression * -1, 1186 unit=expression.unit, 1187 ) 1188 ) 1189 1190 def jsonextract_sql(self, expression: exp.JSONExtract): 1191 this = expression.this 1192 1193 # JSON strings are valid coming from other dialects such as BQ 1194 return self.func( 1195 "GET_PATH", 1196 exp.ParseJSON(this=this) if this.is_string else this, 1197 expression.expression, 1198 ) 1199 1200 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1201 this = expression.this 1202 if not isinstance(this, exp.TsOrDsToTimestamp): 1203 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1204 1205 return self.func("TO_CHAR", this, self.format_time(expression))
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
364 def quote_identifier(self, expression: E, identify: bool = True) -> E: 365 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 366 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 367 if ( 368 isinstance(expression, exp.Identifier) 369 and isinstance(expression.parent, exp.Table) 370 and expression.name.lower() == "dual" 371 ): 372 return expression # type: ignore 373 374 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- PRESERVE_ORIGINAL_NAMES
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
376 class Parser(parser.Parser): 377 IDENTIFY_PIVOT_STRINGS = True 378 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 379 COLON_IS_VARIANT_EXTRACT = True 380 381 ID_VAR_TOKENS = { 382 *parser.Parser.ID_VAR_TOKENS, 383 TokenType.MATCH_CONDITION, 384 } 385 386 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 387 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 388 389 FUNCTIONS = { 390 **parser.Parser.FUNCTIONS, 391 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 392 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 393 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 394 this=seq_get(args, 1), expression=seq_get(args, 0) 395 ), 396 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 397 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 398 start=seq_get(args, 0), 399 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 400 step=seq_get(args, 2), 401 ), 402 "BITXOR": binary_from_function(exp.BitwiseXor), 403 "BIT_XOR": binary_from_function(exp.BitwiseXor), 404 "BITOR": _build_bitor, 405 "BIT_OR": _build_bitor, 406 "BOOLXOR": binary_from_function(exp.Xor), 407 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 408 "DATE_TRUNC": _date_trunc_to_time, 409 "DATEADD": _build_date_time_add(exp.DateAdd), 410 "DATEDIFF": _build_datediff, 411 "DIV0": _build_if_from_div0, 412 "EDITDISTANCE": lambda args: exp.Levenshtein( 413 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 414 ), 415 "FLATTEN": exp.Explode.from_arg_list, 416 "GET_PATH": lambda args, dialect: exp.JSONExtract( 417 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 418 ), 419 "IFF": exp.If.from_arg_list, 420 "LAST_DAY": lambda args: exp.LastDay( 421 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 422 ), 423 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 424 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 425 "LISTAGG": exp.GroupConcat.from_arg_list, 426 "NULLIFZERO": _build_if_from_nullifzero, 427 "OBJECT_CONSTRUCT": _build_object_construct, 428 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 429 "REGEXP_REPLACE": _build_regexp_replace, 430 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 431 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 432 "RLIKE": exp.RegexpLike.from_arg_list, 433 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 434 "TIMEADD": _build_date_time_add(exp.TimeAdd), 435 "TIMEDIFF": _build_datediff, 436 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 437 "TIMESTAMPDIFF": _build_datediff, 438 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 439 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 440 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 441 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 442 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 443 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 444 "TRY_TO_TIMESTAMP": _build_datetime( 445 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 446 ), 447 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 448 "TO_NUMBER": lambda args: exp.ToNumber( 449 this=seq_get(args, 0), 450 format=seq_get(args, 1), 451 precision=seq_get(args, 2), 452 scale=seq_get(args, 3), 453 ), 454 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 455 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 456 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 457 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 458 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 459 "TO_VARCHAR": exp.ToChar.from_arg_list, 460 "ZEROIFNULL": _build_if_from_zeroifnull, 461 } 462 463 FUNCTION_PARSERS = { 464 **parser.Parser.FUNCTION_PARSERS, 465 "DATE_PART": lambda self: self._parse_date_part(), 466 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 467 } 468 FUNCTION_PARSERS.pop("TRIM") 469 470 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 471 472 RANGE_PARSERS = { 473 **parser.Parser.RANGE_PARSERS, 474 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 475 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 476 } 477 478 ALTER_PARSERS = { 479 **parser.Parser.ALTER_PARSERS, 480 "UNSET": lambda self: self.expression( 481 exp.Set, 482 tag=self._match_text_seq("TAG"), 483 expressions=self._parse_csv(self._parse_id_var), 484 unset=True, 485 ), 486 } 487 488 STATEMENT_PARSERS = { 489 **parser.Parser.STATEMENT_PARSERS, 490 TokenType.SHOW: lambda self: self._parse_show(), 491 } 492 493 PROPERTY_PARSERS = { 494 **parser.Parser.PROPERTY_PARSERS, 495 "LOCATION": lambda self: self._parse_location_property(), 496 "TAG": lambda self: self._parse_tag(), 497 } 498 499 TYPE_CONVERTERS = { 500 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 501 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 502 } 503 504 SHOW_PARSERS = { 505 "SCHEMAS": _show_parser("SCHEMAS"), 506 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 507 "OBJECTS": _show_parser("OBJECTS"), 508 "TERSE OBJECTS": _show_parser("OBJECTS"), 509 "TABLES": _show_parser("TABLES"), 510 "TERSE TABLES": _show_parser("TABLES"), 511 "VIEWS": _show_parser("VIEWS"), 512 "TERSE VIEWS": _show_parser("VIEWS"), 513 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 514 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 515 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 516 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 517 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 518 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 519 "SEQUENCES": _show_parser("SEQUENCES"), 520 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 521 "COLUMNS": _show_parser("COLUMNS"), 522 "USERS": _show_parser("USERS"), 523 "TERSE USERS": _show_parser("USERS"), 524 } 525 526 CONSTRAINT_PARSERS = { 527 **parser.Parser.CONSTRAINT_PARSERS, 528 "WITH": lambda self: self._parse_with_constraint(), 529 "MASKING": lambda self: self._parse_with_constraint(), 530 "PROJECTION": lambda self: self._parse_with_constraint(), 531 "TAG": lambda self: self._parse_with_constraint(), 532 } 533 534 STAGED_FILE_SINGLE_TOKENS = { 535 TokenType.DOT, 536 TokenType.MOD, 537 TokenType.SLASH, 538 } 539 540 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 541 542 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 543 544 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 545 546 LAMBDAS = { 547 **parser.Parser.LAMBDAS, 548 TokenType.ARROW: lambda self, expressions: self.expression( 549 exp.Lambda, 550 this=self._replace_lambda( 551 self._parse_assignment(), 552 expressions, 553 ), 554 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 555 ), 556 } 557 558 def _negate_range( 559 self, this: t.Optional[exp.Expression] = None 560 ) -> t.Optional[exp.Expression]: 561 if not this: 562 return this 563 564 query = this.args.get("query") 565 if isinstance(this, exp.In) and isinstance(query, exp.Query): 566 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 567 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 568 # which can produce different results (most likely a SnowFlake bug). 569 # 570 # https://docs.snowflake.com/en/sql-reference/functions/in 571 # Context: https://github.com/tobymao/sqlglot/issues/3890 572 return self.expression( 573 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 574 ) 575 576 return self.expression(exp.Not, this=this) 577 578 def _parse_tag(self) -> exp.Tags: 579 return self.expression( 580 exp.Tags, 581 expressions=self._parse_wrapped_csv(self._parse_property), 582 ) 583 584 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 585 if self._prev.token_type != TokenType.WITH: 586 self._retreat(self._index - 1) 587 588 if self._match_text_seq("MASKING", "POLICY"): 589 policy = self._parse_column() 590 return self.expression( 591 exp.MaskingPolicyColumnConstraint, 592 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 593 expressions=self._match(TokenType.USING) 594 and self._parse_wrapped_csv(self._parse_id_var), 595 ) 596 if self._match_text_seq("PROJECTION", "POLICY"): 597 policy = self._parse_column() 598 return self.expression( 599 exp.ProjectionPolicyColumnConstraint, 600 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 601 ) 602 if self._match(TokenType.TAG): 603 return self._parse_tag() 604 605 return None 606 607 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 608 if self._match(TokenType.TAG): 609 return self._parse_tag() 610 611 return super()._parse_with_property() 612 613 def _parse_create(self) -> exp.Create | exp.Command: 614 expression = super()._parse_create() 615 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 616 # Replace the Table node with the enclosed Identifier 617 expression.this.replace(expression.this.this) 618 619 return expression 620 621 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 622 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 623 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 624 this = self._parse_var() or self._parse_type() 625 626 if not this: 627 return None 628 629 self._match(TokenType.COMMA) 630 expression = self._parse_bitwise() 631 this = map_date_part(this) 632 name = this.name.upper() 633 634 if name.startswith("EPOCH"): 635 if name == "EPOCH_MILLISECOND": 636 scale = 10**3 637 elif name == "EPOCH_MICROSECOND": 638 scale = 10**6 639 elif name == "EPOCH_NANOSECOND": 640 scale = 10**9 641 else: 642 scale = None 643 644 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 645 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 646 647 if scale: 648 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 649 650 return to_unix 651 652 return self.expression(exp.Extract, this=this, expression=expression) 653 654 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 655 if is_map: 656 # Keys are strings in Snowflake's objects, see also: 657 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 658 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 659 return self._parse_slice(self._parse_string()) 660 661 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 662 663 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 664 lateral = super()._parse_lateral() 665 if not lateral: 666 return lateral 667 668 if isinstance(lateral.this, exp.Explode): 669 table_alias = lateral.args.get("alias") 670 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 671 if table_alias and not table_alias.args.get("columns"): 672 table_alias.set("columns", columns) 673 elif not table_alias: 674 exp.alias_(lateral, "_flattened", table=columns, copy=False) 675 676 return lateral 677 678 def _parse_table_parts( 679 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 680 ) -> exp.Table: 681 # https://docs.snowflake.com/en/user-guide/querying-stage 682 if self._match(TokenType.STRING, advance=False): 683 table = self._parse_string() 684 elif self._match_text_seq("@", advance=False): 685 table = self._parse_location_path() 686 else: 687 table = None 688 689 if table: 690 file_format = None 691 pattern = None 692 693 wrapped = self._match(TokenType.L_PAREN) 694 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 695 if self._match_text_seq("FILE_FORMAT", "=>"): 696 file_format = self._parse_string() or super()._parse_table_parts( 697 is_db_reference=is_db_reference 698 ) 699 elif self._match_text_seq("PATTERN", "=>"): 700 pattern = self._parse_string() 701 else: 702 break 703 704 self._match(TokenType.COMMA) 705 706 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 707 else: 708 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 709 710 return table 711 712 def _parse_id_var( 713 self, 714 any_token: bool = True, 715 tokens: t.Optional[t.Collection[TokenType]] = None, 716 ) -> t.Optional[exp.Expression]: 717 if self._match_text_seq("IDENTIFIER", "("): 718 identifier = ( 719 super()._parse_id_var(any_token=any_token, tokens=tokens) 720 or self._parse_string() 721 ) 722 self._match_r_paren() 723 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 724 725 return super()._parse_id_var(any_token=any_token, tokens=tokens) 726 727 def _parse_show_snowflake(self, this: str) -> exp.Show: 728 scope = None 729 scope_kind = None 730 731 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 732 # which is syntactically valid but has no effect on the output 733 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 734 735 history = self._match_text_seq("HISTORY") 736 737 like = self._parse_string() if self._match(TokenType.LIKE) else None 738 739 if self._match(TokenType.IN): 740 if self._match_text_seq("ACCOUNT"): 741 scope_kind = "ACCOUNT" 742 elif self._match_set(self.DB_CREATABLES): 743 scope_kind = self._prev.text.upper() 744 if self._curr: 745 scope = self._parse_table_parts() 746 elif self._curr: 747 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 748 scope = self._parse_table_parts() 749 750 return self.expression( 751 exp.Show, 752 **{ 753 "terse": terse, 754 "this": this, 755 "history": history, 756 "like": like, 757 "scope": scope, 758 "scope_kind": scope_kind, 759 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 760 "limit": self._parse_limit(), 761 "from": self._parse_string() if self._match(TokenType.FROM) else None, 762 }, 763 ) 764 765 def _parse_location_property(self) -> exp.LocationProperty: 766 self._match(TokenType.EQ) 767 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 768 769 def _parse_file_location(self) -> t.Optional[exp.Expression]: 770 # Parse either a subquery or a staged file 771 return ( 772 self._parse_select(table=True, parse_subquery_alias=False) 773 if self._match(TokenType.L_PAREN, advance=False) 774 else self._parse_table_parts() 775 ) 776 777 def _parse_location_path(self) -> exp.Var: 778 parts = [self._advance_any(ignore_reserved=True)] 779 780 # We avoid consuming a comma token because external tables like @foo and @bar 781 # can be joined in a query with a comma separator, as well as closing paren 782 # in case of subqueries 783 while self._is_connected() and not self._match_set( 784 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 785 ): 786 parts.append(self._advance_any(ignore_reserved=True)) 787 788 return exp.var("".join(part.text for part in parts if part)) 789 790 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 791 this = super()._parse_lambda_arg() 792 793 if not this: 794 return this 795 796 typ = self._parse_types() 797 798 if typ: 799 return self.expression(exp.Cast, this=this, to=typ) 800 801 return this 802 803 def _parse_foreign_key(self) -> exp.ForeignKey: 804 # inlineFK, the REFERENCES columns are implied 805 if self._match(TokenType.REFERENCES, advance=False): 806 return self.expression(exp.ForeignKey) 807 808 # outoflineFK, explicitly names the columns 809 return super()._parse_foreign_key()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
811 class Tokenizer(tokens.Tokenizer): 812 STRING_ESCAPES = ["\\", "'"] 813 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 814 RAW_STRINGS = ["$$"] 815 COMMENTS = ["--", "//", ("/*", "*/")] 816 NESTED_COMMENTS = False 817 818 KEYWORDS = { 819 **tokens.Tokenizer.KEYWORDS, 820 "BYTEINT": TokenType.INT, 821 "CHAR VARYING": TokenType.VARCHAR, 822 "CHARACTER VARYING": TokenType.VARCHAR, 823 "EXCLUDE": TokenType.EXCEPT, 824 "ILIKE ANY": TokenType.ILIKE_ANY, 825 "LIKE ANY": TokenType.LIKE_ANY, 826 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 827 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 828 "MINUS": TokenType.EXCEPT, 829 "NCHAR VARYING": TokenType.VARCHAR, 830 "PUT": TokenType.COMMAND, 831 "REMOVE": TokenType.COMMAND, 832 "RM": TokenType.COMMAND, 833 "SAMPLE": TokenType.TABLE_SAMPLE, 834 "SQL_DOUBLE": TokenType.DOUBLE, 835 "SQL_VARCHAR": TokenType.VARCHAR, 836 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 837 "TAG": TokenType.TAG, 838 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 839 "TOP": TokenType.TOP, 840 "WAREHOUSE": TokenType.WAREHOUSE, 841 "STREAMLIT": TokenType.STREAMLIT, 842 } 843 KEYWORDS.pop("/*+") 844 845 SINGLE_TOKENS = { 846 **tokens.Tokenizer.SINGLE_TOKENS, 847 "$": TokenType.PARAMETER, 848 } 849 850 VAR_SINGLE_TOKENS = {"$"} 851 852 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
854 class Generator(generator.Generator): 855 PARAMETER_TOKEN = "$" 856 MATCHED_BY_SOURCE = False 857 SINGLE_STRING_INTERVAL = True 858 JOIN_HINTS = False 859 TABLE_HINTS = False 860 QUERY_HINTS = False 861 AGGREGATE_FILTER_SUPPORTED = False 862 SUPPORTS_TABLE_COPY = False 863 COLLATE_IS_FUNC = True 864 LIMIT_ONLY_LITERALS = True 865 JSON_KEY_VALUE_PAIR_SEP = "," 866 INSERT_OVERWRITE = " OVERWRITE INTO" 867 STRUCT_DELIMITER = ("(", ")") 868 COPY_PARAMS_ARE_WRAPPED = False 869 COPY_PARAMS_EQ_REQUIRED = True 870 STAR_EXCEPT = "EXCLUDE" 871 SUPPORTS_EXPLODING_PROJECTIONS = False 872 ARRAY_CONCAT_IS_VAR_LEN = False 873 SUPPORTS_CONVERT_TIMEZONE = True 874 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 875 SUPPORTS_MEDIAN = True 876 ARRAY_SIZE_NAME = "ARRAY_SIZE" 877 878 TRANSFORMS = { 879 **generator.Generator.TRANSFORMS, 880 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 881 exp.ArgMax: rename_func("MAX_BY"), 882 exp.ArgMin: rename_func("MIN_BY"), 883 exp.Array: inline_array_sql, 884 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 885 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 886 exp.AtTimeZone: lambda self, e: self.func( 887 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 888 ), 889 exp.BitwiseXor: rename_func("BITXOR"), 890 exp.BitwiseOr: rename_func("BITOR"), 891 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 892 exp.DateAdd: date_delta_sql("DATEADD"), 893 exp.DateDiff: date_delta_sql("DATEDIFF"), 894 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 895 exp.DatetimeDiff: timestampdiff_sql, 896 exp.DateStrToDate: datestrtodate_sql, 897 exp.DayOfMonth: rename_func("DAYOFMONTH"), 898 exp.DayOfWeek: rename_func("DAYOFWEEK"), 899 exp.DayOfYear: rename_func("DAYOFYEAR"), 900 exp.Explode: rename_func("FLATTEN"), 901 exp.Extract: rename_func("DATE_PART"), 902 exp.FromTimeZone: lambda self, e: self.func( 903 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 904 ), 905 exp.GenerateSeries: lambda self, e: self.func( 906 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 907 ), 908 exp.GroupConcat: rename_func("LISTAGG"), 909 exp.If: if_sql(name="IFF", false_value="NULL"), 910 exp.JSONExtractArray: _json_extract_value_array_sql, 911 exp.JSONExtractScalar: lambda self, e: self.func( 912 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 913 ), 914 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 915 exp.JSONPathRoot: lambda *_: "", 916 exp.JSONValueArray: _json_extract_value_array_sql, 917 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 918 exp.LogicalOr: rename_func("BOOLOR_AGG"), 919 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 920 exp.MakeInterval: no_make_interval_sql, 921 exp.Max: max_or_greatest, 922 exp.Min: min_or_least, 923 exp.ParseJSON: lambda self, e: self.func( 924 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 925 ), 926 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 927 exp.PercentileCont: transforms.preprocess( 928 [transforms.add_within_group_for_percentiles] 929 ), 930 exp.PercentileDisc: transforms.preprocess( 931 [transforms.add_within_group_for_percentiles] 932 ), 933 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 934 exp.RegexpExtract: _regexpextract_sql, 935 exp.RegexpExtractAll: _regexpextract_sql, 936 exp.RegexpILike: _regexpilike_sql, 937 exp.Rand: rename_func("RANDOM"), 938 exp.Select: transforms.preprocess( 939 [ 940 transforms.eliminate_distinct_on, 941 transforms.explode_to_unnest(), 942 transforms.eliminate_semi_and_anti_joins, 943 _transform_generate_date_array, 944 ] 945 ), 946 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 947 exp.SHA: rename_func("SHA1"), 948 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 949 exp.StartsWith: rename_func("STARTSWITH"), 950 exp.StrPosition: lambda self, e: self.func( 951 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 952 ), 953 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 954 exp.Stuff: rename_func("INSERT"), 955 exp.TimeAdd: date_delta_sql("TIMEADD"), 956 exp.Timestamp: no_timestamp_sql, 957 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 958 exp.TimestampDiff: lambda self, e: self.func( 959 "TIMESTAMPDIFF", e.unit, e.expression, e.this 960 ), 961 exp.TimestampTrunc: timestamptrunc_sql(), 962 exp.TimeStrToTime: timestrtotime_sql, 963 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 964 exp.ToArray: rename_func("TO_ARRAY"), 965 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 966 exp.ToDouble: rename_func("TO_DOUBLE"), 967 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 968 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 969 exp.TsOrDsToDate: lambda self, e: self.func( 970 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 971 ), 972 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 973 exp.Uuid: rename_func("UUID_STRING"), 974 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 975 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 976 exp.Xor: rename_func("BOOLXOR"), 977 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 978 rename_func("EDITDISTANCE") 979 ), 980 } 981 982 SUPPORTED_JSON_PATH_PARTS = { 983 exp.JSONPathKey, 984 exp.JSONPathRoot, 985 exp.JSONPathSubscript, 986 } 987 988 TYPE_MAPPING = { 989 **generator.Generator.TYPE_MAPPING, 990 exp.DataType.Type.NESTED: "OBJECT", 991 exp.DataType.Type.STRUCT: "OBJECT", 992 } 993 994 PROPERTIES_LOCATION = { 995 **generator.Generator.PROPERTIES_LOCATION, 996 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 997 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 998 } 999 1000 UNSUPPORTED_VALUES_EXPRESSIONS = { 1001 exp.Map, 1002 exp.StarMap, 1003 exp.Struct, 1004 exp.VarMap, 1005 } 1006 1007 def with_properties(self, properties: exp.Properties) -> str: 1008 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1009 1010 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1011 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1012 values_as_table = False 1013 1014 return super().values_sql(expression, values_as_table=values_as_table) 1015 1016 def datatype_sql(self, expression: exp.DataType) -> str: 1017 expressions = expression.expressions 1018 if ( 1019 expressions 1020 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1021 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1022 ): 1023 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1024 return "OBJECT" 1025 1026 return super().datatype_sql(expression) 1027 1028 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1029 return self.func( 1030 "TO_NUMBER", 1031 expression.this, 1032 expression.args.get("format"), 1033 expression.args.get("precision"), 1034 expression.args.get("scale"), 1035 ) 1036 1037 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1038 milli = expression.args.get("milli") 1039 if milli is not None: 1040 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1041 expression.set("nano", milli_to_nano) 1042 1043 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1044 1045 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1046 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1047 return self.func("TO_GEOGRAPHY", expression.this) 1048 if expression.is_type(exp.DataType.Type.GEOMETRY): 1049 return self.func("TO_GEOMETRY", expression.this) 1050 1051 return super().cast_sql(expression, safe_prefix=safe_prefix) 1052 1053 def trycast_sql(self, expression: exp.TryCast) -> str: 1054 value = expression.this 1055 1056 if value.type is None: 1057 from sqlglot.optimizer.annotate_types import annotate_types 1058 1059 value = annotate_types(value) 1060 1061 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1062 return super().trycast_sql(expression) 1063 1064 # TRY_CAST only works for string values in Snowflake 1065 return self.cast_sql(expression) 1066 1067 def log_sql(self, expression: exp.Log) -> str: 1068 if not expression.expression: 1069 return self.func("LN", expression.this) 1070 1071 return super().log_sql(expression) 1072 1073 def unnest_sql(self, expression: exp.Unnest) -> str: 1074 unnest_alias = expression.args.get("alias") 1075 offset = expression.args.get("offset") 1076 1077 columns = [ 1078 exp.to_identifier("seq"), 1079 exp.to_identifier("key"), 1080 exp.to_identifier("path"), 1081 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1082 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1083 or exp.to_identifier("value"), 1084 exp.to_identifier("this"), 1085 ] 1086 1087 if unnest_alias: 1088 unnest_alias.set("columns", columns) 1089 else: 1090 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1091 1092 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1093 alias = self.sql(unnest_alias) 1094 alias = f" AS {alias}" if alias else "" 1095 return f"{explode}{alias}" 1096 1097 def show_sql(self, expression: exp.Show) -> str: 1098 terse = "TERSE " if expression.args.get("terse") else "" 1099 history = " HISTORY" if expression.args.get("history") else "" 1100 like = self.sql(expression, "like") 1101 like = f" LIKE {like}" if like else "" 1102 1103 scope = self.sql(expression, "scope") 1104 scope = f" {scope}" if scope else "" 1105 1106 scope_kind = self.sql(expression, "scope_kind") 1107 if scope_kind: 1108 scope_kind = f" IN {scope_kind}" 1109 1110 starts_with = self.sql(expression, "starts_with") 1111 if starts_with: 1112 starts_with = f" STARTS WITH {starts_with}" 1113 1114 limit = self.sql(expression, "limit") 1115 1116 from_ = self.sql(expression, "from") 1117 if from_: 1118 from_ = f" FROM {from_}" 1119 1120 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1121 1122 def describe_sql(self, expression: exp.Describe) -> str: 1123 # Default to table if kind is unknown 1124 kind_value = expression.args.get("kind") or "TABLE" 1125 kind = f" {kind_value}" if kind_value else "" 1126 this = f" {self.sql(expression, 'this')}" 1127 expressions = self.expressions(expression, flat=True) 1128 expressions = f" {expressions}" if expressions else "" 1129 return f"DESCRIBE{kind}{this}{expressions}" 1130 1131 def generatedasidentitycolumnconstraint_sql( 1132 self, expression: exp.GeneratedAsIdentityColumnConstraint 1133 ) -> str: 1134 start = expression.args.get("start") 1135 start = f" START {start}" if start else "" 1136 increment = expression.args.get("increment") 1137 increment = f" INCREMENT {increment}" if increment else "" 1138 return f"AUTOINCREMENT{start}{increment}" 1139 1140 def cluster_sql(self, expression: exp.Cluster) -> str: 1141 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1142 1143 def struct_sql(self, expression: exp.Struct) -> str: 1144 keys = [] 1145 values = [] 1146 1147 for i, e in enumerate(expression.expressions): 1148 if isinstance(e, exp.PropertyEQ): 1149 keys.append( 1150 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1151 ) 1152 values.append(e.expression) 1153 else: 1154 keys.append(exp.Literal.string(f"_{i}")) 1155 values.append(e) 1156 1157 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1158 1159 @unsupported_args("weight", "accuracy") 1160 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1161 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1162 1163 def alterset_sql(self, expression: exp.AlterSet) -> str: 1164 exprs = self.expressions(expression, flat=True) 1165 exprs = f" {exprs}" if exprs else "" 1166 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1167 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1168 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1169 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1170 tag = self.expressions(expression, key="tag", flat=True) 1171 tag = f" TAG {tag}" if tag else "" 1172 1173 return f"SET{exprs}{file_format}{copy_options}{tag}" 1174 1175 def strtotime_sql(self, expression: exp.StrToTime): 1176 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1177 return self.func( 1178 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1179 ) 1180 1181 def timestampsub_sql(self, expression: exp.TimestampSub): 1182 return self.sql( 1183 exp.TimestampAdd( 1184 this=expression.this, 1185 expression=expression.expression * -1, 1186 unit=expression.unit, 1187 ) 1188 ) 1189 1190 def jsonextract_sql(self, expression: exp.JSONExtract): 1191 this = expression.this 1192 1193 # JSON strings are valid coming from other dialects such as BQ 1194 return self.func( 1195 "GET_PATH", 1196 exp.ParseJSON(this=this) if this.is_string else this, 1197 expression.expression, 1198 ) 1199 1200 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1201 this = expression.this 1202 if not isinstance(this, exp.TsOrDsToTimestamp): 1203 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1204 1205 return self.func("TO_CHAR", this, self.format_time(expression))
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1016 def datatype_sql(self, expression: exp.DataType) -> str: 1017 expressions = expression.expressions 1018 if ( 1019 expressions 1020 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1021 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1022 ): 1023 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1024 return "OBJECT" 1025 1026 return super().datatype_sql(expression)
1037 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1038 milli = expression.args.get("milli") 1039 if milli is not None: 1040 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1041 expression.set("nano", milli_to_nano) 1042 1043 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1045 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1046 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1047 return self.func("TO_GEOGRAPHY", expression.this) 1048 if expression.is_type(exp.DataType.Type.GEOMETRY): 1049 return self.func("TO_GEOMETRY", expression.this) 1050 1051 return super().cast_sql(expression, safe_prefix=safe_prefix)
1053 def trycast_sql(self, expression: exp.TryCast) -> str: 1054 value = expression.this 1055 1056 if value.type is None: 1057 from sqlglot.optimizer.annotate_types import annotate_types 1058 1059 value = annotate_types(value) 1060 1061 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1062 return super().trycast_sql(expression) 1063 1064 # TRY_CAST only works for string values in Snowflake 1065 return self.cast_sql(expression)
1073 def unnest_sql(self, expression: exp.Unnest) -> str: 1074 unnest_alias = expression.args.get("alias") 1075 offset = expression.args.get("offset") 1076 1077 columns = [ 1078 exp.to_identifier("seq"), 1079 exp.to_identifier("key"), 1080 exp.to_identifier("path"), 1081 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1082 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1083 or exp.to_identifier("value"), 1084 exp.to_identifier("this"), 1085 ] 1086 1087 if unnest_alias: 1088 unnest_alias.set("columns", columns) 1089 else: 1090 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1091 1092 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 1093 alias = self.sql(unnest_alias) 1094 alias = f" AS {alias}" if alias else "" 1095 return f"{explode}{alias}"
1097 def show_sql(self, expression: exp.Show) -> str: 1098 terse = "TERSE " if expression.args.get("terse") else "" 1099 history = " HISTORY" if expression.args.get("history") else "" 1100 like = self.sql(expression, "like") 1101 like = f" LIKE {like}" if like else "" 1102 1103 scope = self.sql(expression, "scope") 1104 scope = f" {scope}" if scope else "" 1105 1106 scope_kind = self.sql(expression, "scope_kind") 1107 if scope_kind: 1108 scope_kind = f" IN {scope_kind}" 1109 1110 starts_with = self.sql(expression, "starts_with") 1111 if starts_with: 1112 starts_with = f" STARTS WITH {starts_with}" 1113 1114 limit = self.sql(expression, "limit") 1115 1116 from_ = self.sql(expression, "from") 1117 if from_: 1118 from_ = f" FROM {from_}" 1119 1120 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
1122 def describe_sql(self, expression: exp.Describe) -> str: 1123 # Default to table if kind is unknown 1124 kind_value = expression.args.get("kind") or "TABLE" 1125 kind = f" {kind_value}" if kind_value else "" 1126 this = f" {self.sql(expression, 'this')}" 1127 expressions = self.expressions(expression, flat=True) 1128 expressions = f" {expressions}" if expressions else "" 1129 return f"DESCRIBE{kind}{this}{expressions}"
1131 def generatedasidentitycolumnconstraint_sql( 1132 self, expression: exp.GeneratedAsIdentityColumnConstraint 1133 ) -> str: 1134 start = expression.args.get("start") 1135 start = f" START {start}" if start else "" 1136 increment = expression.args.get("increment") 1137 increment = f" INCREMENT {increment}" if increment else "" 1138 return f"AUTOINCREMENT{start}{increment}"
1143 def struct_sql(self, expression: exp.Struct) -> str: 1144 keys = [] 1145 values = [] 1146 1147 for i, e in enumerate(expression.expressions): 1148 if isinstance(e, exp.PropertyEQ): 1149 keys.append( 1150 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1151 ) 1152 values.append(e.expression) 1153 else: 1154 keys.append(exp.Literal.string(f"_{i}")) 1155 values.append(e) 1156 1157 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1163 def alterset_sql(self, expression: exp.AlterSet) -> str: 1164 exprs = self.expressions(expression, flat=True) 1165 exprs = f" {exprs}" if exprs else "" 1166 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1167 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1168 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1169 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1170 tag = self.expressions(expression, key="tag", flat=True) 1171 tag = f" TAG {tag}" if tag else "" 1172 1173 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql