sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 build_timetostr_or_tochar, 10 binary_from_function, 11 build_default_decimal_type, 12 build_timestamp_from_parts, 13 date_delta_sql, 14 date_trunc_to_time, 15 datestrtodate_sql, 16 build_formatted_time, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 rename_func, 22 timestamptrunc_sql, 23 timestrtotime_sql, 24 var_map_sql, 25 map_date_part, 26 no_timestamp_sql, 27 strposition_sql, 28 timestampdiff_sql, 29 no_make_interval_sql, 30 groupconcat_sql, 31) 32from sqlglot.generator import unsupported_args 33from sqlglot.helper import flatten, is_float, is_int, seq_get 34from sqlglot.tokens import TokenType 35 36if t.TYPE_CHECKING: 37 from sqlglot._typing import E, B 38 39 40# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 41def _build_datetime( 42 name: str, kind: exp.DataType.Type, safe: bool = False 43) -> t.Callable[[t.List], exp.Func]: 44 def _builder(args: t.List) -> exp.Func: 45 value = seq_get(args, 0) 46 scale_or_fmt = seq_get(args, 1) 47 48 int_value = value is not None and is_int(value.name) 49 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 50 51 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 52 # Converts calls like `TO_TIME('01:02:03')` into casts 53 if len(args) == 1 and value.is_string and not int_value: 54 return ( 55 exp.TryCast(this=value, to=exp.DataType.build(kind)) 56 if safe 57 else exp.cast(value, kind) 58 ) 59 60 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 61 # cases so we can transpile them, since they're relatively common 62 if kind == exp.DataType.Type.TIMESTAMP: 63 if not safe and (int_value or int_scale_or_fmt): 64 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 65 # it's not easily transpilable 66 return exp.UnixToTime(this=value, scale=scale_or_fmt) 67 if not int_scale_or_fmt and not is_float(value.name): 68 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 69 expr.set("safe", safe) 70 return expr 71 72 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 73 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 74 formatted_exp = build_formatted_time(klass, "snowflake")(args) 75 formatted_exp.set("safe", safe) 76 return formatted_exp 77 78 return exp.Anonymous(this=name, expressions=args) 79 80 return _builder 81 82 83def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 84 expression = parser.build_var_map(args) 85 86 if isinstance(expression, exp.StarMap): 87 return expression 88 89 return exp.Struct( 90 expressions=[ 91 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 92 ] 93 ) 94 95 96def _build_datediff(args: t.List) -> exp.DateDiff: 97 return exp.DateDiff( 98 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 99 ) 100 101 102def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 103 def _builder(args: t.List) -> E: 104 return expr_type( 105 this=seq_get(args, 2), 106 expression=seq_get(args, 1), 107 unit=map_date_part(seq_get(args, 0)), 108 ) 109 110 return _builder 111 112 113def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 114 def _builder(args: t.List) -> B | exp.Anonymous: 115 if len(args) == 3: 116 return exp.Anonymous(this=name, expressions=args) 117 118 return binary_from_function(expr_type)(args) 119 120 return _builder 121 122 123# https://docs.snowflake.com/en/sql-reference/functions/div0 124def _build_if_from_div0(args: t.List) -> exp.If: 125 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 126 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 127 128 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 129 exp.Is(this=lhs, expression=exp.null()).not_() 130 ) 131 true = exp.Literal.number(0) 132 false = exp.Div(this=lhs, expression=rhs) 133 return exp.If(this=cond, true=true, false=false) 134 135 136# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 137def _build_if_from_zeroifnull(args: t.List) -> exp.If: 138 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 139 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 140 141 142# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 143def _build_if_from_nullifzero(args: t.List) -> exp.If: 144 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 145 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 146 147 148def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 149 flag = expression.text("flag") 150 151 if "i" not in flag: 152 flag += "i" 153 154 return self.func( 155 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 156 ) 157 158 159def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 160 regexp_replace = exp.RegexpReplace.from_arg_list(args) 161 162 if not regexp_replace.args.get("replacement"): 163 regexp_replace.set("replacement", exp.Literal.string("")) 164 165 return regexp_replace 166 167 168def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 169 def _parse(self: Snowflake.Parser) -> exp.Show: 170 return self._parse_show_snowflake(*args, **kwargs) 171 172 return _parse 173 174 175def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 176 trunc = date_trunc_to_time(args) 177 trunc.set("unit", map_date_part(trunc.args["unit"])) 178 return trunc 179 180 181def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 182 """ 183 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 184 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 185 186 Example: 187 >>> from sqlglot import parse_one 188 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 189 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 190 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 191 """ 192 if isinstance(expression, exp.Pivot): 193 if expression.unpivot: 194 expression = transforms.unqualify_columns(expression) 195 else: 196 for field in expression.fields: 197 field_expr = seq_get(field.expressions if field else [], 0) 198 199 if isinstance(field_expr, exp.PivotAny): 200 unqualified_field_expr = transforms.unqualify_columns(field_expr) 201 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 202 203 return expression 204 205 206def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 207 assert isinstance(expression, exp.Create) 208 209 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 210 if expression.this in exp.DataType.NESTED_TYPES: 211 expression.set("expressions", None) 212 return expression 213 214 props = expression.args.get("properties") 215 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 216 for schema_expression in expression.this.expressions: 217 if isinstance(schema_expression, exp.ColumnDef): 218 column_type = schema_expression.kind 219 if isinstance(column_type, exp.DataType): 220 column_type.transform(_flatten_structured_type, copy=False) 221 222 return expression 223 224 225def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 226 generate_date_array = unnest.expressions[0] 227 start = generate_date_array.args.get("start") 228 end = generate_date_array.args.get("end") 229 step = generate_date_array.args.get("step") 230 231 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 232 return 233 234 unit = step.args.get("unit") 235 236 unnest_alias = unnest.args.get("alias") 237 if unnest_alias: 238 unnest_alias = unnest_alias.copy() 239 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 240 else: 241 sequence_value_name = "value" 242 243 # We'll add the next sequence value to the starting date and project the result 244 date_add = _build_date_time_add(exp.DateAdd)( 245 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 246 ).as_(sequence_value_name) 247 248 # We use DATEDIFF to compute the number of sequence values needed 249 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 250 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 251 ) 252 253 unnest.set("expressions", [number_sequence]) 254 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 255 256 257def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 258 if isinstance(expression, exp.Select): 259 for generate_date_array in expression.find_all(exp.GenerateDateArray): 260 parent = generate_date_array.parent 261 262 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 263 # query is the following (it'll be unnested properly on the next iteration due to copy): 264 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 265 if not isinstance(parent, exp.Unnest): 266 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 267 generate_date_array.replace( 268 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 269 ) 270 271 if ( 272 isinstance(parent, exp.Unnest) 273 and isinstance(parent.parent, (exp.From, exp.Join)) 274 and len(parent.expressions) == 1 275 ): 276 _unnest_generate_date_array(parent) 277 278 return expression 279 280 281def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 282 def _builder(args: t.List) -> E: 283 return expr_type( 284 this=seq_get(args, 0), 285 expression=seq_get(args, 1), 286 position=seq_get(args, 2), 287 occurrence=seq_get(args, 3), 288 parameters=seq_get(args, 4), 289 group=seq_get(args, 5) or exp.Literal.number(0), 290 ) 291 292 return _builder 293 294 295def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 296 # Other dialects don't support all of the following parameters, so we need to 297 # generate default values as necessary to ensure the transpilation is correct 298 group = expression.args.get("group") 299 300 # To avoid generating all these default values, we set group to None if 301 # it's 0 (also default value) which doesn't trigger the following chain 302 if group and group.name == "0": 303 group = None 304 305 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 306 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 307 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 308 309 return self.func( 310 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 311 expression.this, 312 expression.expression, 313 position, 314 occurrence, 315 parameters, 316 group, 317 ) 318 319 320def _json_extract_value_array_sql( 321 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 322) -> str: 323 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 324 ident = exp.to_identifier("x") 325 326 if isinstance(expression, exp.JSONValueArray): 327 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 328 else: 329 this = exp.ParseJSON(this=f"TO_JSON({ident})") 330 331 transform_lambda = exp.Lambda(expressions=[ident], this=this) 332 333 return self.func("TRANSFORM", json_extract, transform_lambda) 334 335 336class Snowflake(Dialect): 337 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 338 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 339 NULL_ORDERING = "nulls_are_large" 340 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 341 SUPPORTS_USER_DEFINED_TYPES = False 342 SUPPORTS_SEMI_ANTI_JOIN = False 343 PREFER_CTE_ALIAS_COLUMN = True 344 TABLESAMPLE_SIZE_IS_PERCENT = True 345 COPY_PARAMS_ARE_CSV = False 346 ARRAY_AGG_INCLUDES_NULLS = None 347 348 TIME_MAPPING = { 349 "YYYY": "%Y", 350 "yyyy": "%Y", 351 "YY": "%y", 352 "yy": "%y", 353 "MMMM": "%B", 354 "mmmm": "%B", 355 "MON": "%b", 356 "mon": "%b", 357 "MM": "%m", 358 "mm": "%m", 359 "DD": "%d", 360 "dd": "%-d", 361 "DY": "%a", 362 "dy": "%w", 363 "HH24": "%H", 364 "hh24": "%H", 365 "HH12": "%I", 366 "hh12": "%I", 367 "MI": "%M", 368 "mi": "%M", 369 "SS": "%S", 370 "ss": "%S", 371 "FF6": "%f", 372 "ff6": "%f", 373 } 374 375 def quote_identifier(self, expression: E, identify: bool = True) -> E: 376 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 377 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 378 if ( 379 isinstance(expression, exp.Identifier) 380 and isinstance(expression.parent, exp.Table) 381 and expression.name.lower() == "dual" 382 ): 383 return expression # type: ignore 384 385 return super().quote_identifier(expression, identify=identify) 386 387 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 388 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 389 SINGLE_TOKENS.pop("$") 390 391 class Parser(parser.Parser): 392 IDENTIFY_PIVOT_STRINGS = True 393 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 394 COLON_IS_VARIANT_EXTRACT = True 395 396 ID_VAR_TOKENS = { 397 *parser.Parser.ID_VAR_TOKENS, 398 TokenType.MATCH_CONDITION, 399 } 400 401 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 402 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 403 404 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 405 406 FUNCTIONS = { 407 **parser.Parser.FUNCTIONS, 408 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 409 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 410 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 411 this=seq_get(args, 1), expression=seq_get(args, 0) 412 ), 413 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 414 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 415 start=seq_get(args, 0), 416 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 417 step=seq_get(args, 2), 418 ), 419 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 420 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 421 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 422 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 423 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 424 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 425 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 426 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 427 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 428 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 429 "DATE_TRUNC": _date_trunc_to_time, 430 "DATEADD": _build_date_time_add(exp.DateAdd), 431 "DATEDIFF": _build_datediff, 432 "DIV0": _build_if_from_div0, 433 "EDITDISTANCE": lambda args: exp.Levenshtein( 434 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 435 ), 436 "FLATTEN": exp.Explode.from_arg_list, 437 "GET_PATH": lambda args, dialect: exp.JSONExtract( 438 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 439 ), 440 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 441 "IFF": exp.If.from_arg_list, 442 "LAST_DAY": lambda args: exp.LastDay( 443 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 444 ), 445 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 446 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 447 "NULLIFZERO": _build_if_from_nullifzero, 448 "OBJECT_CONSTRUCT": _build_object_construct, 449 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 450 "REGEXP_REPLACE": _build_regexp_replace, 451 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 452 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 453 "RLIKE": exp.RegexpLike.from_arg_list, 454 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 455 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 456 "TIMEADD": _build_date_time_add(exp.TimeAdd), 457 "TIMEDIFF": _build_datediff, 458 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 459 "TIMESTAMPDIFF": _build_datediff, 460 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 461 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 462 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 463 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 464 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 465 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 466 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 467 "TRY_TO_TIMESTAMP": _build_datetime( 468 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 469 ), 470 "TO_CHAR": build_timetostr_or_tochar, 471 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 472 "TO_NUMBER": lambda args: exp.ToNumber( 473 this=seq_get(args, 0), 474 format=seq_get(args, 1), 475 precision=seq_get(args, 2), 476 scale=seq_get(args, 3), 477 ), 478 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 479 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 480 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 481 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 482 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 483 "TO_VARCHAR": exp.ToChar.from_arg_list, 484 "ZEROIFNULL": _build_if_from_zeroifnull, 485 } 486 487 FUNCTION_PARSERS = { 488 **parser.Parser.FUNCTION_PARSERS, 489 "DATE_PART": lambda self: self._parse_date_part(), 490 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 491 "LISTAGG": lambda self: self._parse_string_agg(), 492 } 493 FUNCTION_PARSERS.pop("TRIM") 494 495 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 496 497 RANGE_PARSERS = { 498 **parser.Parser.RANGE_PARSERS, 499 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 500 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 501 } 502 503 ALTER_PARSERS = { 504 **parser.Parser.ALTER_PARSERS, 505 "UNSET": lambda self: self.expression( 506 exp.Set, 507 tag=self._match_text_seq("TAG"), 508 expressions=self._parse_csv(self._parse_id_var), 509 unset=True, 510 ), 511 } 512 513 STATEMENT_PARSERS = { 514 **parser.Parser.STATEMENT_PARSERS, 515 TokenType.PUT: lambda self: self._parse_put(), 516 TokenType.SHOW: lambda self: self._parse_show(), 517 } 518 519 PROPERTY_PARSERS = { 520 **parser.Parser.PROPERTY_PARSERS, 521 "CREDENTIALS": lambda self: self._parse_credentials_property(), 522 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 523 "LOCATION": lambda self: self._parse_location_property(), 524 "TAG": lambda self: self._parse_tag(), 525 "USING": lambda self: self._match_text_seq("TEMPLATE") 526 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 527 } 528 529 TYPE_CONVERTERS = { 530 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 531 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 532 } 533 534 SHOW_PARSERS = { 535 "DATABASES": _show_parser("DATABASES"), 536 "TERSE DATABASES": _show_parser("DATABASES"), 537 "SCHEMAS": _show_parser("SCHEMAS"), 538 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 539 "OBJECTS": _show_parser("OBJECTS"), 540 "TERSE OBJECTS": _show_parser("OBJECTS"), 541 "TABLES": _show_parser("TABLES"), 542 "TERSE TABLES": _show_parser("TABLES"), 543 "VIEWS": _show_parser("VIEWS"), 544 "TERSE VIEWS": _show_parser("VIEWS"), 545 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 546 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 547 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 548 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 549 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 550 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 551 "SEQUENCES": _show_parser("SEQUENCES"), 552 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 553 "STAGES": _show_parser("STAGES"), 554 "COLUMNS": _show_parser("COLUMNS"), 555 "USERS": _show_parser("USERS"), 556 "TERSE USERS": _show_parser("USERS"), 557 "FILE FORMATS": _show_parser("FILE FORMATS"), 558 "FUNCTIONS": _show_parser("FUNCTIONS"), 559 "PROCEDURES": _show_parser("PROCEDURES"), 560 "WAREHOUSES": _show_parser("WAREHOUSES"), 561 } 562 563 CONSTRAINT_PARSERS = { 564 **parser.Parser.CONSTRAINT_PARSERS, 565 "WITH": lambda self: self._parse_with_constraint(), 566 "MASKING": lambda self: self._parse_with_constraint(), 567 "PROJECTION": lambda self: self._parse_with_constraint(), 568 "TAG": lambda self: self._parse_with_constraint(), 569 } 570 571 STAGED_FILE_SINGLE_TOKENS = { 572 TokenType.DOT, 573 TokenType.MOD, 574 TokenType.SLASH, 575 } 576 577 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 578 579 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 580 581 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 582 583 LAMBDAS = { 584 **parser.Parser.LAMBDAS, 585 TokenType.ARROW: lambda self, expressions: self.expression( 586 exp.Lambda, 587 this=self._replace_lambda( 588 self._parse_assignment(), 589 expressions, 590 ), 591 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 592 ), 593 } 594 595 def _parse_use(self) -> exp.Use: 596 if self._match_text_seq("SECONDARY", "ROLES"): 597 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 598 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 599 return self.expression( 600 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 601 ) 602 603 return super()._parse_use() 604 605 def _negate_range( 606 self, this: t.Optional[exp.Expression] = None 607 ) -> t.Optional[exp.Expression]: 608 if not this: 609 return this 610 611 query = this.args.get("query") 612 if isinstance(this, exp.In) and isinstance(query, exp.Query): 613 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 614 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 615 # which can produce different results (most likely a SnowFlake bug). 616 # 617 # https://docs.snowflake.com/en/sql-reference/functions/in 618 # Context: https://github.com/tobymao/sqlglot/issues/3890 619 return self.expression( 620 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 621 ) 622 623 return self.expression(exp.Not, this=this) 624 625 def _parse_tag(self) -> exp.Tags: 626 return self.expression( 627 exp.Tags, 628 expressions=self._parse_wrapped_csv(self._parse_property), 629 ) 630 631 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 632 if self._prev.token_type != TokenType.WITH: 633 self._retreat(self._index - 1) 634 635 if self._match_text_seq("MASKING", "POLICY"): 636 policy = self._parse_column() 637 return self.expression( 638 exp.MaskingPolicyColumnConstraint, 639 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 640 expressions=self._match(TokenType.USING) 641 and self._parse_wrapped_csv(self._parse_id_var), 642 ) 643 if self._match_text_seq("PROJECTION", "POLICY"): 644 policy = self._parse_column() 645 return self.expression( 646 exp.ProjectionPolicyColumnConstraint, 647 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 648 ) 649 if self._match(TokenType.TAG): 650 return self._parse_tag() 651 652 return None 653 654 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 655 if self._match(TokenType.TAG): 656 return self._parse_tag() 657 658 return super()._parse_with_property() 659 660 def _parse_create(self) -> exp.Create | exp.Command: 661 expression = super()._parse_create() 662 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 663 # Replace the Table node with the enclosed Identifier 664 expression.this.replace(expression.this.this) 665 666 return expression 667 668 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 669 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 670 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 671 this = self._parse_var() or self._parse_type() 672 673 if not this: 674 return None 675 676 self._match(TokenType.COMMA) 677 expression = self._parse_bitwise() 678 this = map_date_part(this) 679 name = this.name.upper() 680 681 if name.startswith("EPOCH"): 682 if name == "EPOCH_MILLISECOND": 683 scale = 10**3 684 elif name == "EPOCH_MICROSECOND": 685 scale = 10**6 686 elif name == "EPOCH_NANOSECOND": 687 scale = 10**9 688 else: 689 scale = None 690 691 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 692 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 693 694 if scale: 695 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 696 697 return to_unix 698 699 return self.expression(exp.Extract, this=this, expression=expression) 700 701 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 702 if is_map: 703 # Keys are strings in Snowflake's objects, see also: 704 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 705 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 706 return self._parse_slice(self._parse_string()) 707 708 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 709 710 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 711 lateral = super()._parse_lateral() 712 if not lateral: 713 return lateral 714 715 if isinstance(lateral.this, exp.Explode): 716 table_alias = lateral.args.get("alias") 717 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 718 if table_alias and not table_alias.args.get("columns"): 719 table_alias.set("columns", columns) 720 elif not table_alias: 721 exp.alias_(lateral, "_flattened", table=columns, copy=False) 722 723 return lateral 724 725 def _parse_table_parts( 726 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 727 ) -> exp.Table: 728 # https://docs.snowflake.com/en/user-guide/querying-stage 729 if self._match(TokenType.STRING, advance=False): 730 table = self._parse_string() 731 elif self._match_text_seq("@", advance=False): 732 table = self._parse_location_path() 733 else: 734 table = None 735 736 if table: 737 file_format = None 738 pattern = None 739 740 wrapped = self._match(TokenType.L_PAREN) 741 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 742 if self._match_text_seq("FILE_FORMAT", "=>"): 743 file_format = self._parse_string() or super()._parse_table_parts( 744 is_db_reference=is_db_reference 745 ) 746 elif self._match_text_seq("PATTERN", "=>"): 747 pattern = self._parse_string() 748 else: 749 break 750 751 self._match(TokenType.COMMA) 752 753 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 754 else: 755 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 756 757 return table 758 759 def _parse_table( 760 self, 761 schema: bool = False, 762 joins: bool = False, 763 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 764 parse_bracket: bool = False, 765 is_db_reference: bool = False, 766 parse_partition: bool = False, 767 ) -> t.Optional[exp.Expression]: 768 table = super()._parse_table( 769 schema=schema, 770 joins=joins, 771 alias_tokens=alias_tokens, 772 parse_bracket=parse_bracket, 773 is_db_reference=is_db_reference, 774 parse_partition=parse_partition, 775 ) 776 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 777 table_from_rows = table.this 778 for arg in exp.TableFromRows.arg_types: 779 if arg != "this": 780 table_from_rows.set(arg, table.args.get(arg)) 781 782 table = table_from_rows 783 784 return table 785 786 def _parse_id_var( 787 self, 788 any_token: bool = True, 789 tokens: t.Optional[t.Collection[TokenType]] = None, 790 ) -> t.Optional[exp.Expression]: 791 if self._match_text_seq("IDENTIFIER", "("): 792 identifier = ( 793 super()._parse_id_var(any_token=any_token, tokens=tokens) 794 or self._parse_string() 795 ) 796 self._match_r_paren() 797 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 798 799 return super()._parse_id_var(any_token=any_token, tokens=tokens) 800 801 def _parse_show_snowflake(self, this: str) -> exp.Show: 802 scope = None 803 scope_kind = None 804 805 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 806 # which is syntactically valid but has no effect on the output 807 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 808 809 history = self._match_text_seq("HISTORY") 810 811 like = self._parse_string() if self._match(TokenType.LIKE) else None 812 813 if self._match(TokenType.IN): 814 if self._match_text_seq("ACCOUNT"): 815 scope_kind = "ACCOUNT" 816 elif self._match_text_seq("CLASS"): 817 scope_kind = "CLASS" 818 scope = self._parse_table_parts() 819 elif self._match_text_seq("APPLICATION"): 820 scope_kind = "APPLICATION" 821 if self._match_text_seq("PACKAGE"): 822 scope_kind += " PACKAGE" 823 scope = self._parse_table_parts() 824 elif self._match_set(self.DB_CREATABLES): 825 scope_kind = self._prev.text.upper() 826 if self._curr: 827 scope = self._parse_table_parts() 828 elif self._curr: 829 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 830 scope = self._parse_table_parts() 831 832 return self.expression( 833 exp.Show, 834 **{ 835 "terse": terse, 836 "this": this, 837 "history": history, 838 "like": like, 839 "scope": scope, 840 "scope_kind": scope_kind, 841 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 842 "limit": self._parse_limit(), 843 "from": self._parse_string() if self._match(TokenType.FROM) else None, 844 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 845 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 846 }, 847 ) 848 849 def _parse_put(self) -> exp.Put | exp.Command: 850 if self._curr.token_type != TokenType.STRING: 851 return self._parse_as_command(self._prev) 852 853 return self.expression( 854 exp.Put, 855 this=self._parse_string(), 856 target=self._parse_location_path(), 857 properties=self._parse_properties(), 858 ) 859 860 def _parse_location_property(self) -> exp.LocationProperty: 861 self._match(TokenType.EQ) 862 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 863 864 def _parse_file_location(self) -> t.Optional[exp.Expression]: 865 # Parse either a subquery or a staged file 866 return ( 867 self._parse_select(table=True, parse_subquery_alias=False) 868 if self._match(TokenType.L_PAREN, advance=False) 869 else self._parse_table_parts() 870 ) 871 872 def _parse_location_path(self) -> exp.Var: 873 start = self._curr 874 self._advance_any(ignore_reserved=True) 875 876 # We avoid consuming a comma token because external tables like @foo and @bar 877 # can be joined in a query with a comma separator, as well as closing paren 878 # in case of subqueries 879 while self._is_connected() and not self._match_set( 880 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 881 ): 882 self._advance_any(ignore_reserved=True) 883 884 return exp.var(self._find_sql(start, self._prev)) 885 886 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 887 this = super()._parse_lambda_arg() 888 889 if not this: 890 return this 891 892 typ = self._parse_types() 893 894 if typ: 895 return self.expression(exp.Cast, this=this, to=typ) 896 897 return this 898 899 def _parse_foreign_key(self) -> exp.ForeignKey: 900 # inlineFK, the REFERENCES columns are implied 901 if self._match(TokenType.REFERENCES, advance=False): 902 return self.expression(exp.ForeignKey) 903 904 # outoflineFK, explicitly names the columns 905 return super()._parse_foreign_key() 906 907 def _parse_file_format_property(self) -> exp.FileFormatProperty: 908 self._match(TokenType.EQ) 909 if self._match(TokenType.L_PAREN, advance=False): 910 expressions = self._parse_wrapped_options() 911 else: 912 expressions = [self._parse_format_name()] 913 914 return self.expression( 915 exp.FileFormatProperty, 916 expressions=expressions, 917 ) 918 919 def _parse_credentials_property(self) -> exp.CredentialsProperty: 920 return self.expression( 921 exp.CredentialsProperty, 922 expressions=self._parse_wrapped_options(), 923 ) 924 925 class Tokenizer(tokens.Tokenizer): 926 STRING_ESCAPES = ["\\", "'"] 927 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 928 RAW_STRINGS = ["$$"] 929 COMMENTS = ["--", "//", ("/*", "*/")] 930 NESTED_COMMENTS = False 931 932 KEYWORDS = { 933 **tokens.Tokenizer.KEYWORDS, 934 "FILE://": TokenType.URI_START, 935 "BYTEINT": TokenType.INT, 936 "CHAR VARYING": TokenType.VARCHAR, 937 "CHARACTER VARYING": TokenType.VARCHAR, 938 "EXCLUDE": TokenType.EXCEPT, 939 "FILE FORMAT": TokenType.FILE_FORMAT, 940 "ILIKE ANY": TokenType.ILIKE_ANY, 941 "LIKE ANY": TokenType.LIKE_ANY, 942 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 943 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 944 "MINUS": TokenType.EXCEPT, 945 "NCHAR VARYING": TokenType.VARCHAR, 946 "PUT": TokenType.PUT, 947 "REMOVE": TokenType.COMMAND, 948 "RM": TokenType.COMMAND, 949 "SAMPLE": TokenType.TABLE_SAMPLE, 950 "SQL_DOUBLE": TokenType.DOUBLE, 951 "SQL_VARCHAR": TokenType.VARCHAR, 952 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 953 "TAG": TokenType.TAG, 954 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 955 "TOP": TokenType.TOP, 956 "WAREHOUSE": TokenType.WAREHOUSE, 957 "STAGE": TokenType.STAGE, 958 "STREAMLIT": TokenType.STREAMLIT, 959 } 960 KEYWORDS.pop("/*+") 961 962 SINGLE_TOKENS = { 963 **tokens.Tokenizer.SINGLE_TOKENS, 964 "$": TokenType.PARAMETER, 965 } 966 967 VAR_SINGLE_TOKENS = {"$"} 968 969 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 970 971 class Generator(generator.Generator): 972 PARAMETER_TOKEN = "$" 973 MATCHED_BY_SOURCE = False 974 SINGLE_STRING_INTERVAL = True 975 JOIN_HINTS = False 976 TABLE_HINTS = False 977 QUERY_HINTS = False 978 AGGREGATE_FILTER_SUPPORTED = False 979 SUPPORTS_TABLE_COPY = False 980 COLLATE_IS_FUNC = True 981 LIMIT_ONLY_LITERALS = True 982 JSON_KEY_VALUE_PAIR_SEP = "," 983 INSERT_OVERWRITE = " OVERWRITE INTO" 984 STRUCT_DELIMITER = ("(", ")") 985 COPY_PARAMS_ARE_WRAPPED = False 986 COPY_PARAMS_EQ_REQUIRED = True 987 STAR_EXCEPT = "EXCLUDE" 988 SUPPORTS_EXPLODING_PROJECTIONS = False 989 ARRAY_CONCAT_IS_VAR_LEN = False 990 SUPPORTS_CONVERT_TIMEZONE = True 991 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 992 SUPPORTS_MEDIAN = True 993 ARRAY_SIZE_NAME = "ARRAY_SIZE" 994 995 TRANSFORMS = { 996 **generator.Generator.TRANSFORMS, 997 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 998 exp.ArgMax: rename_func("MAX_BY"), 999 exp.ArgMin: rename_func("MIN_BY"), 1000 exp.Array: inline_array_sql, 1001 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1002 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1003 exp.AtTimeZone: lambda self, e: self.func( 1004 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1005 ), 1006 exp.BitwiseOr: rename_func("BITOR"), 1007 exp.BitwiseXor: rename_func("BITXOR"), 1008 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1009 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1010 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1011 exp.DateAdd: date_delta_sql("DATEADD"), 1012 exp.DateDiff: date_delta_sql("DATEDIFF"), 1013 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1014 exp.DatetimeDiff: timestampdiff_sql, 1015 exp.DateStrToDate: datestrtodate_sql, 1016 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1017 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1018 exp.DayOfYear: rename_func("DAYOFYEAR"), 1019 exp.Explode: rename_func("FLATTEN"), 1020 exp.Extract: rename_func("DATE_PART"), 1021 exp.FileFormatProperty: lambda self, 1022 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1023 exp.FromTimeZone: lambda self, e: self.func( 1024 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1025 ), 1026 exp.GenerateSeries: lambda self, e: self.func( 1027 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1028 ), 1029 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1030 exp.If: if_sql(name="IFF", false_value="NULL"), 1031 exp.JSONExtractArray: _json_extract_value_array_sql, 1032 exp.JSONExtractScalar: lambda self, e: self.func( 1033 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1034 ), 1035 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1036 exp.JSONPathRoot: lambda *_: "", 1037 exp.JSONValueArray: _json_extract_value_array_sql, 1038 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1039 rename_func("EDITDISTANCE") 1040 ), 1041 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1042 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1043 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1044 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1045 exp.MakeInterval: no_make_interval_sql, 1046 exp.Max: max_or_greatest, 1047 exp.Min: min_or_least, 1048 exp.ParseJSON: lambda self, e: self.func( 1049 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1050 ), 1051 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1052 exp.PercentileCont: transforms.preprocess( 1053 [transforms.add_within_group_for_percentiles] 1054 ), 1055 exp.PercentileDisc: transforms.preprocess( 1056 [transforms.add_within_group_for_percentiles] 1057 ), 1058 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1059 exp.RegexpExtract: _regexpextract_sql, 1060 exp.RegexpExtractAll: _regexpextract_sql, 1061 exp.RegexpILike: _regexpilike_sql, 1062 exp.Rand: rename_func("RANDOM"), 1063 exp.Select: transforms.preprocess( 1064 [ 1065 transforms.eliminate_distinct_on, 1066 transforms.explode_projection_to_unnest(), 1067 transforms.eliminate_semi_and_anti_joins, 1068 _transform_generate_date_array, 1069 ] 1070 ), 1071 exp.SHA: rename_func("SHA1"), 1072 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1073 exp.StartsWith: rename_func("STARTSWITH"), 1074 exp.StrPosition: lambda self, e: strposition_sql( 1075 self, e, func_name="CHARINDEX", supports_position=True 1076 ), 1077 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1078 exp.Stuff: rename_func("INSERT"), 1079 exp.TimeAdd: date_delta_sql("TIMEADD"), 1080 exp.Timestamp: no_timestamp_sql, 1081 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1082 exp.TimestampDiff: lambda self, e: self.func( 1083 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1084 ), 1085 exp.TimestampTrunc: timestamptrunc_sql(), 1086 exp.TimeStrToTime: timestrtotime_sql, 1087 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1088 exp.ToArray: rename_func("TO_ARRAY"), 1089 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1090 exp.ToDouble: rename_func("TO_DOUBLE"), 1091 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1092 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1093 exp.TsOrDsToDate: lambda self, e: self.func( 1094 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1095 ), 1096 exp.TsOrDsToTime: lambda self, e: self.func( 1097 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1098 ), 1099 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1100 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1101 exp.Uuid: rename_func("UUID_STRING"), 1102 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1103 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1104 exp.Xor: rename_func("BOOLXOR"), 1105 } 1106 1107 SUPPORTED_JSON_PATH_PARTS = { 1108 exp.JSONPathKey, 1109 exp.JSONPathRoot, 1110 exp.JSONPathSubscript, 1111 } 1112 1113 TYPE_MAPPING = { 1114 **generator.Generator.TYPE_MAPPING, 1115 exp.DataType.Type.NESTED: "OBJECT", 1116 exp.DataType.Type.STRUCT: "OBJECT", 1117 } 1118 1119 TOKEN_MAPPING = { 1120 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1121 } 1122 1123 PROPERTIES_LOCATION = { 1124 **generator.Generator.PROPERTIES_LOCATION, 1125 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1126 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1127 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1128 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1129 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1130 } 1131 1132 UNSUPPORTED_VALUES_EXPRESSIONS = { 1133 exp.Map, 1134 exp.StarMap, 1135 exp.Struct, 1136 exp.VarMap, 1137 } 1138 1139 def with_properties(self, properties: exp.Properties) -> str: 1140 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1141 1142 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1143 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1144 values_as_table = False 1145 1146 return super().values_sql(expression, values_as_table=values_as_table) 1147 1148 def datatype_sql(self, expression: exp.DataType) -> str: 1149 expressions = expression.expressions 1150 if ( 1151 expressions 1152 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1153 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1154 ): 1155 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1156 return "OBJECT" 1157 1158 return super().datatype_sql(expression) 1159 1160 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1161 return self.func( 1162 "TO_NUMBER", 1163 expression.this, 1164 expression.args.get("format"), 1165 expression.args.get("precision"), 1166 expression.args.get("scale"), 1167 ) 1168 1169 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1170 milli = expression.args.get("milli") 1171 if milli is not None: 1172 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1173 expression.set("nano", milli_to_nano) 1174 1175 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1176 1177 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1178 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1179 return self.func("TO_GEOGRAPHY", expression.this) 1180 if expression.is_type(exp.DataType.Type.GEOMETRY): 1181 return self.func("TO_GEOMETRY", expression.this) 1182 1183 return super().cast_sql(expression, safe_prefix=safe_prefix) 1184 1185 def trycast_sql(self, expression: exp.TryCast) -> str: 1186 value = expression.this 1187 1188 if value.type is None: 1189 from sqlglot.optimizer.annotate_types import annotate_types 1190 1191 value = annotate_types(value, dialect=self.dialect) 1192 1193 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1194 return super().trycast_sql(expression) 1195 1196 # TRY_CAST only works for string values in Snowflake 1197 return self.cast_sql(expression) 1198 1199 def log_sql(self, expression: exp.Log) -> str: 1200 if not expression.expression: 1201 return self.func("LN", expression.this) 1202 1203 return super().log_sql(expression) 1204 1205 def unnest_sql(self, expression: exp.Unnest) -> str: 1206 unnest_alias = expression.args.get("alias") 1207 offset = expression.args.get("offset") 1208 1209 columns = [ 1210 exp.to_identifier("seq"), 1211 exp.to_identifier("key"), 1212 exp.to_identifier("path"), 1213 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1214 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1215 or exp.to_identifier("value"), 1216 exp.to_identifier("this"), 1217 ] 1218 1219 if unnest_alias: 1220 unnest_alias.set("columns", columns) 1221 else: 1222 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1223 1224 table_input = self.sql(expression.expressions[0]) 1225 if not table_input.startswith("INPUT =>"): 1226 table_input = f"INPUT => {table_input}" 1227 1228 explode = f"TABLE(FLATTEN({table_input}))" 1229 alias = self.sql(unnest_alias) 1230 alias = f" AS {alias}" if alias else "" 1231 return f"{explode}{alias}" 1232 1233 def show_sql(self, expression: exp.Show) -> str: 1234 terse = "TERSE " if expression.args.get("terse") else "" 1235 history = " HISTORY" if expression.args.get("history") else "" 1236 like = self.sql(expression, "like") 1237 like = f" LIKE {like}" if like else "" 1238 1239 scope = self.sql(expression, "scope") 1240 scope = f" {scope}" if scope else "" 1241 1242 scope_kind = self.sql(expression, "scope_kind") 1243 if scope_kind: 1244 scope_kind = f" IN {scope_kind}" 1245 1246 starts_with = self.sql(expression, "starts_with") 1247 if starts_with: 1248 starts_with = f" STARTS WITH {starts_with}" 1249 1250 limit = self.sql(expression, "limit") 1251 1252 from_ = self.sql(expression, "from") 1253 if from_: 1254 from_ = f" FROM {from_}" 1255 1256 privileges = self.expressions(expression, key="privileges", flat=True) 1257 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1258 1259 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1260 1261 def describe_sql(self, expression: exp.Describe) -> str: 1262 # Default to table if kind is unknown 1263 kind_value = expression.args.get("kind") or "TABLE" 1264 kind = f" {kind_value}" if kind_value else "" 1265 this = f" {self.sql(expression, 'this')}" 1266 expressions = self.expressions(expression, flat=True) 1267 expressions = f" {expressions}" if expressions else "" 1268 return f"DESCRIBE{kind}{this}{expressions}" 1269 1270 def generatedasidentitycolumnconstraint_sql( 1271 self, expression: exp.GeneratedAsIdentityColumnConstraint 1272 ) -> str: 1273 start = expression.args.get("start") 1274 start = f" START {start}" if start else "" 1275 increment = expression.args.get("increment") 1276 increment = f" INCREMENT {increment}" if increment else "" 1277 return f"AUTOINCREMENT{start}{increment}" 1278 1279 def cluster_sql(self, expression: exp.Cluster) -> str: 1280 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1281 1282 def struct_sql(self, expression: exp.Struct) -> str: 1283 keys = [] 1284 values = [] 1285 1286 for i, e in enumerate(expression.expressions): 1287 if isinstance(e, exp.PropertyEQ): 1288 keys.append( 1289 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1290 ) 1291 values.append(e.expression) 1292 else: 1293 keys.append(exp.Literal.string(f"_{i}")) 1294 values.append(e) 1295 1296 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1297 1298 @unsupported_args("weight", "accuracy") 1299 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1300 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1301 1302 def alterset_sql(self, expression: exp.AlterSet) -> str: 1303 exprs = self.expressions(expression, flat=True) 1304 exprs = f" {exprs}" if exprs else "" 1305 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1306 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1307 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1308 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1309 tag = self.expressions(expression, key="tag", flat=True) 1310 tag = f" TAG {tag}" if tag else "" 1311 1312 return f"SET{exprs}{file_format}{copy_options}{tag}" 1313 1314 def strtotime_sql(self, expression: exp.StrToTime): 1315 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1316 return self.func( 1317 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1318 ) 1319 1320 def timestampsub_sql(self, expression: exp.TimestampSub): 1321 return self.sql( 1322 exp.TimestampAdd( 1323 this=expression.this, 1324 expression=expression.expression * -1, 1325 unit=expression.unit, 1326 ) 1327 ) 1328 1329 def jsonextract_sql(self, expression: exp.JSONExtract): 1330 this = expression.this 1331 1332 # JSON strings are valid coming from other dialects such as BQ 1333 return self.func( 1334 "GET_PATH", 1335 exp.ParseJSON(this=this) if this.is_string else this, 1336 expression.expression, 1337 ) 1338 1339 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1340 this = expression.this 1341 if not isinstance(this, exp.TsOrDsToTimestamp): 1342 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1343 1344 return self.func("TO_CHAR", this, self.format_time(expression)) 1345 1346 def datesub_sql(self, expression: exp.DateSub) -> str: 1347 value = expression.expression 1348 if value: 1349 value.replace(value * (-1)) 1350 else: 1351 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1352 1353 return date_delta_sql("DATEADD")(self, expression) 1354 1355 def select_sql(self, expression: exp.Select) -> str: 1356 limit = expression.args.get("limit") 1357 offset = expression.args.get("offset") 1358 if offset and not limit: 1359 expression.limit(exp.Null(), copy=False) 1360 return super().select_sql(expression)
337class Snowflake(Dialect): 338 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 339 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 340 NULL_ORDERING = "nulls_are_large" 341 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 342 SUPPORTS_USER_DEFINED_TYPES = False 343 SUPPORTS_SEMI_ANTI_JOIN = False 344 PREFER_CTE_ALIAS_COLUMN = True 345 TABLESAMPLE_SIZE_IS_PERCENT = True 346 COPY_PARAMS_ARE_CSV = False 347 ARRAY_AGG_INCLUDES_NULLS = None 348 349 TIME_MAPPING = { 350 "YYYY": "%Y", 351 "yyyy": "%Y", 352 "YY": "%y", 353 "yy": "%y", 354 "MMMM": "%B", 355 "mmmm": "%B", 356 "MON": "%b", 357 "mon": "%b", 358 "MM": "%m", 359 "mm": "%m", 360 "DD": "%d", 361 "dd": "%-d", 362 "DY": "%a", 363 "dy": "%w", 364 "HH24": "%H", 365 "hh24": "%H", 366 "HH12": "%I", 367 "hh12": "%I", 368 "MI": "%M", 369 "mi": "%M", 370 "SS": "%S", 371 "ss": "%S", 372 "FF6": "%f", 373 "ff6": "%f", 374 } 375 376 def quote_identifier(self, expression: E, identify: bool = True) -> E: 377 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 378 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 379 if ( 380 isinstance(expression, exp.Identifier) 381 and isinstance(expression.parent, exp.Table) 382 and expression.name.lower() == "dual" 383 ): 384 return expression # type: ignore 385 386 return super().quote_identifier(expression, identify=identify) 387 388 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 389 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 390 SINGLE_TOKENS.pop("$") 391 392 class Parser(parser.Parser): 393 IDENTIFY_PIVOT_STRINGS = True 394 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 395 COLON_IS_VARIANT_EXTRACT = True 396 397 ID_VAR_TOKENS = { 398 *parser.Parser.ID_VAR_TOKENS, 399 TokenType.MATCH_CONDITION, 400 } 401 402 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 403 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 404 405 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 406 407 FUNCTIONS = { 408 **parser.Parser.FUNCTIONS, 409 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 410 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 411 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 412 this=seq_get(args, 1), expression=seq_get(args, 0) 413 ), 414 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 415 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 416 start=seq_get(args, 0), 417 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 418 step=seq_get(args, 2), 419 ), 420 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 421 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 422 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 423 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 424 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 425 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 426 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 427 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 428 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 429 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 430 "DATE_TRUNC": _date_trunc_to_time, 431 "DATEADD": _build_date_time_add(exp.DateAdd), 432 "DATEDIFF": _build_datediff, 433 "DIV0": _build_if_from_div0, 434 "EDITDISTANCE": lambda args: exp.Levenshtein( 435 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 436 ), 437 "FLATTEN": exp.Explode.from_arg_list, 438 "GET_PATH": lambda args, dialect: exp.JSONExtract( 439 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 440 ), 441 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 442 "IFF": exp.If.from_arg_list, 443 "LAST_DAY": lambda args: exp.LastDay( 444 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 445 ), 446 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 447 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 448 "NULLIFZERO": _build_if_from_nullifzero, 449 "OBJECT_CONSTRUCT": _build_object_construct, 450 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 451 "REGEXP_REPLACE": _build_regexp_replace, 452 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 453 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 454 "RLIKE": exp.RegexpLike.from_arg_list, 455 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 456 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 457 "TIMEADD": _build_date_time_add(exp.TimeAdd), 458 "TIMEDIFF": _build_datediff, 459 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 460 "TIMESTAMPDIFF": _build_datediff, 461 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 462 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 463 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 464 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 465 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 466 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 467 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 468 "TRY_TO_TIMESTAMP": _build_datetime( 469 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 470 ), 471 "TO_CHAR": build_timetostr_or_tochar, 472 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 473 "TO_NUMBER": lambda args: exp.ToNumber( 474 this=seq_get(args, 0), 475 format=seq_get(args, 1), 476 precision=seq_get(args, 2), 477 scale=seq_get(args, 3), 478 ), 479 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 480 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 481 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 482 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 483 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 484 "TO_VARCHAR": exp.ToChar.from_arg_list, 485 "ZEROIFNULL": _build_if_from_zeroifnull, 486 } 487 488 FUNCTION_PARSERS = { 489 **parser.Parser.FUNCTION_PARSERS, 490 "DATE_PART": lambda self: self._parse_date_part(), 491 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 492 "LISTAGG": lambda self: self._parse_string_agg(), 493 } 494 FUNCTION_PARSERS.pop("TRIM") 495 496 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 497 498 RANGE_PARSERS = { 499 **parser.Parser.RANGE_PARSERS, 500 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 501 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 502 } 503 504 ALTER_PARSERS = { 505 **parser.Parser.ALTER_PARSERS, 506 "UNSET": lambda self: self.expression( 507 exp.Set, 508 tag=self._match_text_seq("TAG"), 509 expressions=self._parse_csv(self._parse_id_var), 510 unset=True, 511 ), 512 } 513 514 STATEMENT_PARSERS = { 515 **parser.Parser.STATEMENT_PARSERS, 516 TokenType.PUT: lambda self: self._parse_put(), 517 TokenType.SHOW: lambda self: self._parse_show(), 518 } 519 520 PROPERTY_PARSERS = { 521 **parser.Parser.PROPERTY_PARSERS, 522 "CREDENTIALS": lambda self: self._parse_credentials_property(), 523 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 524 "LOCATION": lambda self: self._parse_location_property(), 525 "TAG": lambda self: self._parse_tag(), 526 "USING": lambda self: self._match_text_seq("TEMPLATE") 527 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 528 } 529 530 TYPE_CONVERTERS = { 531 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 532 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 533 } 534 535 SHOW_PARSERS = { 536 "DATABASES": _show_parser("DATABASES"), 537 "TERSE DATABASES": _show_parser("DATABASES"), 538 "SCHEMAS": _show_parser("SCHEMAS"), 539 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 540 "OBJECTS": _show_parser("OBJECTS"), 541 "TERSE OBJECTS": _show_parser("OBJECTS"), 542 "TABLES": _show_parser("TABLES"), 543 "TERSE TABLES": _show_parser("TABLES"), 544 "VIEWS": _show_parser("VIEWS"), 545 "TERSE VIEWS": _show_parser("VIEWS"), 546 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 547 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 548 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 549 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 550 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 551 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 552 "SEQUENCES": _show_parser("SEQUENCES"), 553 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 554 "STAGES": _show_parser("STAGES"), 555 "COLUMNS": _show_parser("COLUMNS"), 556 "USERS": _show_parser("USERS"), 557 "TERSE USERS": _show_parser("USERS"), 558 "FILE FORMATS": _show_parser("FILE FORMATS"), 559 "FUNCTIONS": _show_parser("FUNCTIONS"), 560 "PROCEDURES": _show_parser("PROCEDURES"), 561 "WAREHOUSES": _show_parser("WAREHOUSES"), 562 } 563 564 CONSTRAINT_PARSERS = { 565 **parser.Parser.CONSTRAINT_PARSERS, 566 "WITH": lambda self: self._parse_with_constraint(), 567 "MASKING": lambda self: self._parse_with_constraint(), 568 "PROJECTION": lambda self: self._parse_with_constraint(), 569 "TAG": lambda self: self._parse_with_constraint(), 570 } 571 572 STAGED_FILE_SINGLE_TOKENS = { 573 TokenType.DOT, 574 TokenType.MOD, 575 TokenType.SLASH, 576 } 577 578 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 579 580 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 581 582 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 583 584 LAMBDAS = { 585 **parser.Parser.LAMBDAS, 586 TokenType.ARROW: lambda self, expressions: self.expression( 587 exp.Lambda, 588 this=self._replace_lambda( 589 self._parse_assignment(), 590 expressions, 591 ), 592 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 593 ), 594 } 595 596 def _parse_use(self) -> exp.Use: 597 if self._match_text_seq("SECONDARY", "ROLES"): 598 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 599 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 600 return self.expression( 601 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 602 ) 603 604 return super()._parse_use() 605 606 def _negate_range( 607 self, this: t.Optional[exp.Expression] = None 608 ) -> t.Optional[exp.Expression]: 609 if not this: 610 return this 611 612 query = this.args.get("query") 613 if isinstance(this, exp.In) and isinstance(query, exp.Query): 614 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 615 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 616 # which can produce different results (most likely a SnowFlake bug). 617 # 618 # https://docs.snowflake.com/en/sql-reference/functions/in 619 # Context: https://github.com/tobymao/sqlglot/issues/3890 620 return self.expression( 621 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 622 ) 623 624 return self.expression(exp.Not, this=this) 625 626 def _parse_tag(self) -> exp.Tags: 627 return self.expression( 628 exp.Tags, 629 expressions=self._parse_wrapped_csv(self._parse_property), 630 ) 631 632 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 633 if self._prev.token_type != TokenType.WITH: 634 self._retreat(self._index - 1) 635 636 if self._match_text_seq("MASKING", "POLICY"): 637 policy = self._parse_column() 638 return self.expression( 639 exp.MaskingPolicyColumnConstraint, 640 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 641 expressions=self._match(TokenType.USING) 642 and self._parse_wrapped_csv(self._parse_id_var), 643 ) 644 if self._match_text_seq("PROJECTION", "POLICY"): 645 policy = self._parse_column() 646 return self.expression( 647 exp.ProjectionPolicyColumnConstraint, 648 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 649 ) 650 if self._match(TokenType.TAG): 651 return self._parse_tag() 652 653 return None 654 655 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 656 if self._match(TokenType.TAG): 657 return self._parse_tag() 658 659 return super()._parse_with_property() 660 661 def _parse_create(self) -> exp.Create | exp.Command: 662 expression = super()._parse_create() 663 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 664 # Replace the Table node with the enclosed Identifier 665 expression.this.replace(expression.this.this) 666 667 return expression 668 669 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 670 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 671 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 672 this = self._parse_var() or self._parse_type() 673 674 if not this: 675 return None 676 677 self._match(TokenType.COMMA) 678 expression = self._parse_bitwise() 679 this = map_date_part(this) 680 name = this.name.upper() 681 682 if name.startswith("EPOCH"): 683 if name == "EPOCH_MILLISECOND": 684 scale = 10**3 685 elif name == "EPOCH_MICROSECOND": 686 scale = 10**6 687 elif name == "EPOCH_NANOSECOND": 688 scale = 10**9 689 else: 690 scale = None 691 692 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 693 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 694 695 if scale: 696 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 697 698 return to_unix 699 700 return self.expression(exp.Extract, this=this, expression=expression) 701 702 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 703 if is_map: 704 # Keys are strings in Snowflake's objects, see also: 705 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 706 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 707 return self._parse_slice(self._parse_string()) 708 709 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 710 711 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 712 lateral = super()._parse_lateral() 713 if not lateral: 714 return lateral 715 716 if isinstance(lateral.this, exp.Explode): 717 table_alias = lateral.args.get("alias") 718 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 719 if table_alias and not table_alias.args.get("columns"): 720 table_alias.set("columns", columns) 721 elif not table_alias: 722 exp.alias_(lateral, "_flattened", table=columns, copy=False) 723 724 return lateral 725 726 def _parse_table_parts( 727 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 728 ) -> exp.Table: 729 # https://docs.snowflake.com/en/user-guide/querying-stage 730 if self._match(TokenType.STRING, advance=False): 731 table = self._parse_string() 732 elif self._match_text_seq("@", advance=False): 733 table = self._parse_location_path() 734 else: 735 table = None 736 737 if table: 738 file_format = None 739 pattern = None 740 741 wrapped = self._match(TokenType.L_PAREN) 742 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 743 if self._match_text_seq("FILE_FORMAT", "=>"): 744 file_format = self._parse_string() or super()._parse_table_parts( 745 is_db_reference=is_db_reference 746 ) 747 elif self._match_text_seq("PATTERN", "=>"): 748 pattern = self._parse_string() 749 else: 750 break 751 752 self._match(TokenType.COMMA) 753 754 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 755 else: 756 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 757 758 return table 759 760 def _parse_table( 761 self, 762 schema: bool = False, 763 joins: bool = False, 764 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 765 parse_bracket: bool = False, 766 is_db_reference: bool = False, 767 parse_partition: bool = False, 768 ) -> t.Optional[exp.Expression]: 769 table = super()._parse_table( 770 schema=schema, 771 joins=joins, 772 alias_tokens=alias_tokens, 773 parse_bracket=parse_bracket, 774 is_db_reference=is_db_reference, 775 parse_partition=parse_partition, 776 ) 777 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 778 table_from_rows = table.this 779 for arg in exp.TableFromRows.arg_types: 780 if arg != "this": 781 table_from_rows.set(arg, table.args.get(arg)) 782 783 table = table_from_rows 784 785 return table 786 787 def _parse_id_var( 788 self, 789 any_token: bool = True, 790 tokens: t.Optional[t.Collection[TokenType]] = None, 791 ) -> t.Optional[exp.Expression]: 792 if self._match_text_seq("IDENTIFIER", "("): 793 identifier = ( 794 super()._parse_id_var(any_token=any_token, tokens=tokens) 795 or self._parse_string() 796 ) 797 self._match_r_paren() 798 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 799 800 return super()._parse_id_var(any_token=any_token, tokens=tokens) 801 802 def _parse_show_snowflake(self, this: str) -> exp.Show: 803 scope = None 804 scope_kind = None 805 806 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 807 # which is syntactically valid but has no effect on the output 808 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 809 810 history = self._match_text_seq("HISTORY") 811 812 like = self._parse_string() if self._match(TokenType.LIKE) else None 813 814 if self._match(TokenType.IN): 815 if self._match_text_seq("ACCOUNT"): 816 scope_kind = "ACCOUNT" 817 elif self._match_text_seq("CLASS"): 818 scope_kind = "CLASS" 819 scope = self._parse_table_parts() 820 elif self._match_text_seq("APPLICATION"): 821 scope_kind = "APPLICATION" 822 if self._match_text_seq("PACKAGE"): 823 scope_kind += " PACKAGE" 824 scope = self._parse_table_parts() 825 elif self._match_set(self.DB_CREATABLES): 826 scope_kind = self._prev.text.upper() 827 if self._curr: 828 scope = self._parse_table_parts() 829 elif self._curr: 830 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 831 scope = self._parse_table_parts() 832 833 return self.expression( 834 exp.Show, 835 **{ 836 "terse": terse, 837 "this": this, 838 "history": history, 839 "like": like, 840 "scope": scope, 841 "scope_kind": scope_kind, 842 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 843 "limit": self._parse_limit(), 844 "from": self._parse_string() if self._match(TokenType.FROM) else None, 845 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 846 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 847 }, 848 ) 849 850 def _parse_put(self) -> exp.Put | exp.Command: 851 if self._curr.token_type != TokenType.STRING: 852 return self._parse_as_command(self._prev) 853 854 return self.expression( 855 exp.Put, 856 this=self._parse_string(), 857 target=self._parse_location_path(), 858 properties=self._parse_properties(), 859 ) 860 861 def _parse_location_property(self) -> exp.LocationProperty: 862 self._match(TokenType.EQ) 863 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 864 865 def _parse_file_location(self) -> t.Optional[exp.Expression]: 866 # Parse either a subquery or a staged file 867 return ( 868 self._parse_select(table=True, parse_subquery_alias=False) 869 if self._match(TokenType.L_PAREN, advance=False) 870 else self._parse_table_parts() 871 ) 872 873 def _parse_location_path(self) -> exp.Var: 874 start = self._curr 875 self._advance_any(ignore_reserved=True) 876 877 # We avoid consuming a comma token because external tables like @foo and @bar 878 # can be joined in a query with a comma separator, as well as closing paren 879 # in case of subqueries 880 while self._is_connected() and not self._match_set( 881 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 882 ): 883 self._advance_any(ignore_reserved=True) 884 885 return exp.var(self._find_sql(start, self._prev)) 886 887 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 888 this = super()._parse_lambda_arg() 889 890 if not this: 891 return this 892 893 typ = self._parse_types() 894 895 if typ: 896 return self.expression(exp.Cast, this=this, to=typ) 897 898 return this 899 900 def _parse_foreign_key(self) -> exp.ForeignKey: 901 # inlineFK, the REFERENCES columns are implied 902 if self._match(TokenType.REFERENCES, advance=False): 903 return self.expression(exp.ForeignKey) 904 905 # outoflineFK, explicitly names the columns 906 return super()._parse_foreign_key() 907 908 def _parse_file_format_property(self) -> exp.FileFormatProperty: 909 self._match(TokenType.EQ) 910 if self._match(TokenType.L_PAREN, advance=False): 911 expressions = self._parse_wrapped_options() 912 else: 913 expressions = [self._parse_format_name()] 914 915 return self.expression( 916 exp.FileFormatProperty, 917 expressions=expressions, 918 ) 919 920 def _parse_credentials_property(self) -> exp.CredentialsProperty: 921 return self.expression( 922 exp.CredentialsProperty, 923 expressions=self._parse_wrapped_options(), 924 ) 925 926 class Tokenizer(tokens.Tokenizer): 927 STRING_ESCAPES = ["\\", "'"] 928 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 929 RAW_STRINGS = ["$$"] 930 COMMENTS = ["--", "//", ("/*", "*/")] 931 NESTED_COMMENTS = False 932 933 KEYWORDS = { 934 **tokens.Tokenizer.KEYWORDS, 935 "FILE://": TokenType.URI_START, 936 "BYTEINT": TokenType.INT, 937 "CHAR VARYING": TokenType.VARCHAR, 938 "CHARACTER VARYING": TokenType.VARCHAR, 939 "EXCLUDE": TokenType.EXCEPT, 940 "FILE FORMAT": TokenType.FILE_FORMAT, 941 "ILIKE ANY": TokenType.ILIKE_ANY, 942 "LIKE ANY": TokenType.LIKE_ANY, 943 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 944 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 945 "MINUS": TokenType.EXCEPT, 946 "NCHAR VARYING": TokenType.VARCHAR, 947 "PUT": TokenType.PUT, 948 "REMOVE": TokenType.COMMAND, 949 "RM": TokenType.COMMAND, 950 "SAMPLE": TokenType.TABLE_SAMPLE, 951 "SQL_DOUBLE": TokenType.DOUBLE, 952 "SQL_VARCHAR": TokenType.VARCHAR, 953 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 954 "TAG": TokenType.TAG, 955 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 956 "TOP": TokenType.TOP, 957 "WAREHOUSE": TokenType.WAREHOUSE, 958 "STAGE": TokenType.STAGE, 959 "STREAMLIT": TokenType.STREAMLIT, 960 } 961 KEYWORDS.pop("/*+") 962 963 SINGLE_TOKENS = { 964 **tokens.Tokenizer.SINGLE_TOKENS, 965 "$": TokenType.PARAMETER, 966 } 967 968 VAR_SINGLE_TOKENS = {"$"} 969 970 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 971 972 class Generator(generator.Generator): 973 PARAMETER_TOKEN = "$" 974 MATCHED_BY_SOURCE = False 975 SINGLE_STRING_INTERVAL = True 976 JOIN_HINTS = False 977 TABLE_HINTS = False 978 QUERY_HINTS = False 979 AGGREGATE_FILTER_SUPPORTED = False 980 SUPPORTS_TABLE_COPY = False 981 COLLATE_IS_FUNC = True 982 LIMIT_ONLY_LITERALS = True 983 JSON_KEY_VALUE_PAIR_SEP = "," 984 INSERT_OVERWRITE = " OVERWRITE INTO" 985 STRUCT_DELIMITER = ("(", ")") 986 COPY_PARAMS_ARE_WRAPPED = False 987 COPY_PARAMS_EQ_REQUIRED = True 988 STAR_EXCEPT = "EXCLUDE" 989 SUPPORTS_EXPLODING_PROJECTIONS = False 990 ARRAY_CONCAT_IS_VAR_LEN = False 991 SUPPORTS_CONVERT_TIMEZONE = True 992 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 993 SUPPORTS_MEDIAN = True 994 ARRAY_SIZE_NAME = "ARRAY_SIZE" 995 996 TRANSFORMS = { 997 **generator.Generator.TRANSFORMS, 998 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 999 exp.ArgMax: rename_func("MAX_BY"), 1000 exp.ArgMin: rename_func("MIN_BY"), 1001 exp.Array: inline_array_sql, 1002 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1003 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1004 exp.AtTimeZone: lambda self, e: self.func( 1005 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1006 ), 1007 exp.BitwiseOr: rename_func("BITOR"), 1008 exp.BitwiseXor: rename_func("BITXOR"), 1009 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1010 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1011 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1012 exp.DateAdd: date_delta_sql("DATEADD"), 1013 exp.DateDiff: date_delta_sql("DATEDIFF"), 1014 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1015 exp.DatetimeDiff: timestampdiff_sql, 1016 exp.DateStrToDate: datestrtodate_sql, 1017 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1018 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1019 exp.DayOfYear: rename_func("DAYOFYEAR"), 1020 exp.Explode: rename_func("FLATTEN"), 1021 exp.Extract: rename_func("DATE_PART"), 1022 exp.FileFormatProperty: lambda self, 1023 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1024 exp.FromTimeZone: lambda self, e: self.func( 1025 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1026 ), 1027 exp.GenerateSeries: lambda self, e: self.func( 1028 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1029 ), 1030 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1031 exp.If: if_sql(name="IFF", false_value="NULL"), 1032 exp.JSONExtractArray: _json_extract_value_array_sql, 1033 exp.JSONExtractScalar: lambda self, e: self.func( 1034 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1035 ), 1036 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1037 exp.JSONPathRoot: lambda *_: "", 1038 exp.JSONValueArray: _json_extract_value_array_sql, 1039 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1040 rename_func("EDITDISTANCE") 1041 ), 1042 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1043 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1044 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1045 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1046 exp.MakeInterval: no_make_interval_sql, 1047 exp.Max: max_or_greatest, 1048 exp.Min: min_or_least, 1049 exp.ParseJSON: lambda self, e: self.func( 1050 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1051 ), 1052 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1053 exp.PercentileCont: transforms.preprocess( 1054 [transforms.add_within_group_for_percentiles] 1055 ), 1056 exp.PercentileDisc: transforms.preprocess( 1057 [transforms.add_within_group_for_percentiles] 1058 ), 1059 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1060 exp.RegexpExtract: _regexpextract_sql, 1061 exp.RegexpExtractAll: _regexpextract_sql, 1062 exp.RegexpILike: _regexpilike_sql, 1063 exp.Rand: rename_func("RANDOM"), 1064 exp.Select: transforms.preprocess( 1065 [ 1066 transforms.eliminate_distinct_on, 1067 transforms.explode_projection_to_unnest(), 1068 transforms.eliminate_semi_and_anti_joins, 1069 _transform_generate_date_array, 1070 ] 1071 ), 1072 exp.SHA: rename_func("SHA1"), 1073 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1074 exp.StartsWith: rename_func("STARTSWITH"), 1075 exp.StrPosition: lambda self, e: strposition_sql( 1076 self, e, func_name="CHARINDEX", supports_position=True 1077 ), 1078 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1079 exp.Stuff: rename_func("INSERT"), 1080 exp.TimeAdd: date_delta_sql("TIMEADD"), 1081 exp.Timestamp: no_timestamp_sql, 1082 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1083 exp.TimestampDiff: lambda self, e: self.func( 1084 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1085 ), 1086 exp.TimestampTrunc: timestamptrunc_sql(), 1087 exp.TimeStrToTime: timestrtotime_sql, 1088 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1089 exp.ToArray: rename_func("TO_ARRAY"), 1090 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1091 exp.ToDouble: rename_func("TO_DOUBLE"), 1092 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1093 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1094 exp.TsOrDsToDate: lambda self, e: self.func( 1095 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1096 ), 1097 exp.TsOrDsToTime: lambda self, e: self.func( 1098 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1099 ), 1100 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1101 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1102 exp.Uuid: rename_func("UUID_STRING"), 1103 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1104 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1105 exp.Xor: rename_func("BOOLXOR"), 1106 } 1107 1108 SUPPORTED_JSON_PATH_PARTS = { 1109 exp.JSONPathKey, 1110 exp.JSONPathRoot, 1111 exp.JSONPathSubscript, 1112 } 1113 1114 TYPE_MAPPING = { 1115 **generator.Generator.TYPE_MAPPING, 1116 exp.DataType.Type.NESTED: "OBJECT", 1117 exp.DataType.Type.STRUCT: "OBJECT", 1118 } 1119 1120 TOKEN_MAPPING = { 1121 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1122 } 1123 1124 PROPERTIES_LOCATION = { 1125 **generator.Generator.PROPERTIES_LOCATION, 1126 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1127 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1128 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1129 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1130 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1131 } 1132 1133 UNSUPPORTED_VALUES_EXPRESSIONS = { 1134 exp.Map, 1135 exp.StarMap, 1136 exp.Struct, 1137 exp.VarMap, 1138 } 1139 1140 def with_properties(self, properties: exp.Properties) -> str: 1141 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1142 1143 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1144 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1145 values_as_table = False 1146 1147 return super().values_sql(expression, values_as_table=values_as_table) 1148 1149 def datatype_sql(self, expression: exp.DataType) -> str: 1150 expressions = expression.expressions 1151 if ( 1152 expressions 1153 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1154 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1155 ): 1156 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1157 return "OBJECT" 1158 1159 return super().datatype_sql(expression) 1160 1161 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1162 return self.func( 1163 "TO_NUMBER", 1164 expression.this, 1165 expression.args.get("format"), 1166 expression.args.get("precision"), 1167 expression.args.get("scale"), 1168 ) 1169 1170 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1171 milli = expression.args.get("milli") 1172 if milli is not None: 1173 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1174 expression.set("nano", milli_to_nano) 1175 1176 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1177 1178 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1179 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1180 return self.func("TO_GEOGRAPHY", expression.this) 1181 if expression.is_type(exp.DataType.Type.GEOMETRY): 1182 return self.func("TO_GEOMETRY", expression.this) 1183 1184 return super().cast_sql(expression, safe_prefix=safe_prefix) 1185 1186 def trycast_sql(self, expression: exp.TryCast) -> str: 1187 value = expression.this 1188 1189 if value.type is None: 1190 from sqlglot.optimizer.annotate_types import annotate_types 1191 1192 value = annotate_types(value, dialect=self.dialect) 1193 1194 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1195 return super().trycast_sql(expression) 1196 1197 # TRY_CAST only works for string values in Snowflake 1198 return self.cast_sql(expression) 1199 1200 def log_sql(self, expression: exp.Log) -> str: 1201 if not expression.expression: 1202 return self.func("LN", expression.this) 1203 1204 return super().log_sql(expression) 1205 1206 def unnest_sql(self, expression: exp.Unnest) -> str: 1207 unnest_alias = expression.args.get("alias") 1208 offset = expression.args.get("offset") 1209 1210 columns = [ 1211 exp.to_identifier("seq"), 1212 exp.to_identifier("key"), 1213 exp.to_identifier("path"), 1214 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1215 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1216 or exp.to_identifier("value"), 1217 exp.to_identifier("this"), 1218 ] 1219 1220 if unnest_alias: 1221 unnest_alias.set("columns", columns) 1222 else: 1223 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1224 1225 table_input = self.sql(expression.expressions[0]) 1226 if not table_input.startswith("INPUT =>"): 1227 table_input = f"INPUT => {table_input}" 1228 1229 explode = f"TABLE(FLATTEN({table_input}))" 1230 alias = self.sql(unnest_alias) 1231 alias = f" AS {alias}" if alias else "" 1232 return f"{explode}{alias}" 1233 1234 def show_sql(self, expression: exp.Show) -> str: 1235 terse = "TERSE " if expression.args.get("terse") else "" 1236 history = " HISTORY" if expression.args.get("history") else "" 1237 like = self.sql(expression, "like") 1238 like = f" LIKE {like}" if like else "" 1239 1240 scope = self.sql(expression, "scope") 1241 scope = f" {scope}" if scope else "" 1242 1243 scope_kind = self.sql(expression, "scope_kind") 1244 if scope_kind: 1245 scope_kind = f" IN {scope_kind}" 1246 1247 starts_with = self.sql(expression, "starts_with") 1248 if starts_with: 1249 starts_with = f" STARTS WITH {starts_with}" 1250 1251 limit = self.sql(expression, "limit") 1252 1253 from_ = self.sql(expression, "from") 1254 if from_: 1255 from_ = f" FROM {from_}" 1256 1257 privileges = self.expressions(expression, key="privileges", flat=True) 1258 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1259 1260 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1261 1262 def describe_sql(self, expression: exp.Describe) -> str: 1263 # Default to table if kind is unknown 1264 kind_value = expression.args.get("kind") or "TABLE" 1265 kind = f" {kind_value}" if kind_value else "" 1266 this = f" {self.sql(expression, 'this')}" 1267 expressions = self.expressions(expression, flat=True) 1268 expressions = f" {expressions}" if expressions else "" 1269 return f"DESCRIBE{kind}{this}{expressions}" 1270 1271 def generatedasidentitycolumnconstraint_sql( 1272 self, expression: exp.GeneratedAsIdentityColumnConstraint 1273 ) -> str: 1274 start = expression.args.get("start") 1275 start = f" START {start}" if start else "" 1276 increment = expression.args.get("increment") 1277 increment = f" INCREMENT {increment}" if increment else "" 1278 return f"AUTOINCREMENT{start}{increment}" 1279 1280 def cluster_sql(self, expression: exp.Cluster) -> str: 1281 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1282 1283 def struct_sql(self, expression: exp.Struct) -> str: 1284 keys = [] 1285 values = [] 1286 1287 for i, e in enumerate(expression.expressions): 1288 if isinstance(e, exp.PropertyEQ): 1289 keys.append( 1290 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1291 ) 1292 values.append(e.expression) 1293 else: 1294 keys.append(exp.Literal.string(f"_{i}")) 1295 values.append(e) 1296 1297 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1298 1299 @unsupported_args("weight", "accuracy") 1300 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1301 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1302 1303 def alterset_sql(self, expression: exp.AlterSet) -> str: 1304 exprs = self.expressions(expression, flat=True) 1305 exprs = f" {exprs}" if exprs else "" 1306 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1307 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1308 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1309 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1310 tag = self.expressions(expression, key="tag", flat=True) 1311 tag = f" TAG {tag}" if tag else "" 1312 1313 return f"SET{exprs}{file_format}{copy_options}{tag}" 1314 1315 def strtotime_sql(self, expression: exp.StrToTime): 1316 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1317 return self.func( 1318 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1319 ) 1320 1321 def timestampsub_sql(self, expression: exp.TimestampSub): 1322 return self.sql( 1323 exp.TimestampAdd( 1324 this=expression.this, 1325 expression=expression.expression * -1, 1326 unit=expression.unit, 1327 ) 1328 ) 1329 1330 def jsonextract_sql(self, expression: exp.JSONExtract): 1331 this = expression.this 1332 1333 # JSON strings are valid coming from other dialects such as BQ 1334 return self.func( 1335 "GET_PATH", 1336 exp.ParseJSON(this=this) if this.is_string else this, 1337 expression.expression, 1338 ) 1339 1340 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1341 this = expression.this 1342 if not isinstance(this, exp.TsOrDsToTimestamp): 1343 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1344 1345 return self.func("TO_CHAR", this, self.format_time(expression)) 1346 1347 def datesub_sql(self, expression: exp.DateSub) -> str: 1348 value = expression.expression 1349 if value: 1350 value.replace(value * (-1)) 1351 else: 1352 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1353 1354 return date_delta_sql("DATEADD")(self, expression) 1355 1356 def select_sql(self, expression: exp.Select) -> str: 1357 limit = expression.args.get("limit") 1358 offset = expression.args.get("offset") 1359 if offset and not limit: 1360 expression.limit(exp.Null(), copy=False) 1361 return super().select_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
376 def quote_identifier(self, expression: E, identify: bool = True) -> E: 377 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 378 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 379 if ( 380 isinstance(expression, exp.Identifier) 381 and isinstance(expression.parent, exp.Table) 382 and expression.name.lower() == "dual" 383 ): 384 return expression # type: ignore 385 386 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
388 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 389 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 390 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
392 class Parser(parser.Parser): 393 IDENTIFY_PIVOT_STRINGS = True 394 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 395 COLON_IS_VARIANT_EXTRACT = True 396 397 ID_VAR_TOKENS = { 398 *parser.Parser.ID_VAR_TOKENS, 399 TokenType.MATCH_CONDITION, 400 } 401 402 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 403 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 404 405 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 406 407 FUNCTIONS = { 408 **parser.Parser.FUNCTIONS, 409 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 410 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 411 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 412 this=seq_get(args, 1), expression=seq_get(args, 0) 413 ), 414 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 415 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 416 start=seq_get(args, 0), 417 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 418 step=seq_get(args, 2), 419 ), 420 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 421 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 422 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 423 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 424 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 425 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 426 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 427 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 428 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 429 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 430 "DATE_TRUNC": _date_trunc_to_time, 431 "DATEADD": _build_date_time_add(exp.DateAdd), 432 "DATEDIFF": _build_datediff, 433 "DIV0": _build_if_from_div0, 434 "EDITDISTANCE": lambda args: exp.Levenshtein( 435 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 436 ), 437 "FLATTEN": exp.Explode.from_arg_list, 438 "GET_PATH": lambda args, dialect: exp.JSONExtract( 439 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 440 ), 441 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 442 "IFF": exp.If.from_arg_list, 443 "LAST_DAY": lambda args: exp.LastDay( 444 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 445 ), 446 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 447 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 448 "NULLIFZERO": _build_if_from_nullifzero, 449 "OBJECT_CONSTRUCT": _build_object_construct, 450 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 451 "REGEXP_REPLACE": _build_regexp_replace, 452 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 453 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 454 "RLIKE": exp.RegexpLike.from_arg_list, 455 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 456 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 457 "TIMEADD": _build_date_time_add(exp.TimeAdd), 458 "TIMEDIFF": _build_datediff, 459 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 460 "TIMESTAMPDIFF": _build_datediff, 461 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 462 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 463 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 464 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 465 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 466 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 467 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 468 "TRY_TO_TIMESTAMP": _build_datetime( 469 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 470 ), 471 "TO_CHAR": build_timetostr_or_tochar, 472 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 473 "TO_NUMBER": lambda args: exp.ToNumber( 474 this=seq_get(args, 0), 475 format=seq_get(args, 1), 476 precision=seq_get(args, 2), 477 scale=seq_get(args, 3), 478 ), 479 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 480 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 481 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 482 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 483 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 484 "TO_VARCHAR": exp.ToChar.from_arg_list, 485 "ZEROIFNULL": _build_if_from_zeroifnull, 486 } 487 488 FUNCTION_PARSERS = { 489 **parser.Parser.FUNCTION_PARSERS, 490 "DATE_PART": lambda self: self._parse_date_part(), 491 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 492 "LISTAGG": lambda self: self._parse_string_agg(), 493 } 494 FUNCTION_PARSERS.pop("TRIM") 495 496 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 497 498 RANGE_PARSERS = { 499 **parser.Parser.RANGE_PARSERS, 500 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 501 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 502 } 503 504 ALTER_PARSERS = { 505 **parser.Parser.ALTER_PARSERS, 506 "UNSET": lambda self: self.expression( 507 exp.Set, 508 tag=self._match_text_seq("TAG"), 509 expressions=self._parse_csv(self._parse_id_var), 510 unset=True, 511 ), 512 } 513 514 STATEMENT_PARSERS = { 515 **parser.Parser.STATEMENT_PARSERS, 516 TokenType.PUT: lambda self: self._parse_put(), 517 TokenType.SHOW: lambda self: self._parse_show(), 518 } 519 520 PROPERTY_PARSERS = { 521 **parser.Parser.PROPERTY_PARSERS, 522 "CREDENTIALS": lambda self: self._parse_credentials_property(), 523 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 524 "LOCATION": lambda self: self._parse_location_property(), 525 "TAG": lambda self: self._parse_tag(), 526 "USING": lambda self: self._match_text_seq("TEMPLATE") 527 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 528 } 529 530 TYPE_CONVERTERS = { 531 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 532 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 533 } 534 535 SHOW_PARSERS = { 536 "DATABASES": _show_parser("DATABASES"), 537 "TERSE DATABASES": _show_parser("DATABASES"), 538 "SCHEMAS": _show_parser("SCHEMAS"), 539 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 540 "OBJECTS": _show_parser("OBJECTS"), 541 "TERSE OBJECTS": _show_parser("OBJECTS"), 542 "TABLES": _show_parser("TABLES"), 543 "TERSE TABLES": _show_parser("TABLES"), 544 "VIEWS": _show_parser("VIEWS"), 545 "TERSE VIEWS": _show_parser("VIEWS"), 546 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 547 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 548 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 549 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 550 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 551 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 552 "SEQUENCES": _show_parser("SEQUENCES"), 553 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 554 "STAGES": _show_parser("STAGES"), 555 "COLUMNS": _show_parser("COLUMNS"), 556 "USERS": _show_parser("USERS"), 557 "TERSE USERS": _show_parser("USERS"), 558 "FILE FORMATS": _show_parser("FILE FORMATS"), 559 "FUNCTIONS": _show_parser("FUNCTIONS"), 560 "PROCEDURES": _show_parser("PROCEDURES"), 561 "WAREHOUSES": _show_parser("WAREHOUSES"), 562 } 563 564 CONSTRAINT_PARSERS = { 565 **parser.Parser.CONSTRAINT_PARSERS, 566 "WITH": lambda self: self._parse_with_constraint(), 567 "MASKING": lambda self: self._parse_with_constraint(), 568 "PROJECTION": lambda self: self._parse_with_constraint(), 569 "TAG": lambda self: self._parse_with_constraint(), 570 } 571 572 STAGED_FILE_SINGLE_TOKENS = { 573 TokenType.DOT, 574 TokenType.MOD, 575 TokenType.SLASH, 576 } 577 578 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 579 580 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 581 582 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 583 584 LAMBDAS = { 585 **parser.Parser.LAMBDAS, 586 TokenType.ARROW: lambda self, expressions: self.expression( 587 exp.Lambda, 588 this=self._replace_lambda( 589 self._parse_assignment(), 590 expressions, 591 ), 592 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 593 ), 594 } 595 596 def _parse_use(self) -> exp.Use: 597 if self._match_text_seq("SECONDARY", "ROLES"): 598 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 599 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 600 return self.expression( 601 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 602 ) 603 604 return super()._parse_use() 605 606 def _negate_range( 607 self, this: t.Optional[exp.Expression] = None 608 ) -> t.Optional[exp.Expression]: 609 if not this: 610 return this 611 612 query = this.args.get("query") 613 if isinstance(this, exp.In) and isinstance(query, exp.Query): 614 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 615 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 616 # which can produce different results (most likely a SnowFlake bug). 617 # 618 # https://docs.snowflake.com/en/sql-reference/functions/in 619 # Context: https://github.com/tobymao/sqlglot/issues/3890 620 return self.expression( 621 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 622 ) 623 624 return self.expression(exp.Not, this=this) 625 626 def _parse_tag(self) -> exp.Tags: 627 return self.expression( 628 exp.Tags, 629 expressions=self._parse_wrapped_csv(self._parse_property), 630 ) 631 632 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 633 if self._prev.token_type != TokenType.WITH: 634 self._retreat(self._index - 1) 635 636 if self._match_text_seq("MASKING", "POLICY"): 637 policy = self._parse_column() 638 return self.expression( 639 exp.MaskingPolicyColumnConstraint, 640 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 641 expressions=self._match(TokenType.USING) 642 and self._parse_wrapped_csv(self._parse_id_var), 643 ) 644 if self._match_text_seq("PROJECTION", "POLICY"): 645 policy = self._parse_column() 646 return self.expression( 647 exp.ProjectionPolicyColumnConstraint, 648 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 649 ) 650 if self._match(TokenType.TAG): 651 return self._parse_tag() 652 653 return None 654 655 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 656 if self._match(TokenType.TAG): 657 return self._parse_tag() 658 659 return super()._parse_with_property() 660 661 def _parse_create(self) -> exp.Create | exp.Command: 662 expression = super()._parse_create() 663 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 664 # Replace the Table node with the enclosed Identifier 665 expression.this.replace(expression.this.this) 666 667 return expression 668 669 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 670 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 671 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 672 this = self._parse_var() or self._parse_type() 673 674 if not this: 675 return None 676 677 self._match(TokenType.COMMA) 678 expression = self._parse_bitwise() 679 this = map_date_part(this) 680 name = this.name.upper() 681 682 if name.startswith("EPOCH"): 683 if name == "EPOCH_MILLISECOND": 684 scale = 10**3 685 elif name == "EPOCH_MICROSECOND": 686 scale = 10**6 687 elif name == "EPOCH_NANOSECOND": 688 scale = 10**9 689 else: 690 scale = None 691 692 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 693 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 694 695 if scale: 696 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 697 698 return to_unix 699 700 return self.expression(exp.Extract, this=this, expression=expression) 701 702 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 703 if is_map: 704 # Keys are strings in Snowflake's objects, see also: 705 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 706 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 707 return self._parse_slice(self._parse_string()) 708 709 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 710 711 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 712 lateral = super()._parse_lateral() 713 if not lateral: 714 return lateral 715 716 if isinstance(lateral.this, exp.Explode): 717 table_alias = lateral.args.get("alias") 718 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 719 if table_alias and not table_alias.args.get("columns"): 720 table_alias.set("columns", columns) 721 elif not table_alias: 722 exp.alias_(lateral, "_flattened", table=columns, copy=False) 723 724 return lateral 725 726 def _parse_table_parts( 727 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 728 ) -> exp.Table: 729 # https://docs.snowflake.com/en/user-guide/querying-stage 730 if self._match(TokenType.STRING, advance=False): 731 table = self._parse_string() 732 elif self._match_text_seq("@", advance=False): 733 table = self._parse_location_path() 734 else: 735 table = None 736 737 if table: 738 file_format = None 739 pattern = None 740 741 wrapped = self._match(TokenType.L_PAREN) 742 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 743 if self._match_text_seq("FILE_FORMAT", "=>"): 744 file_format = self._parse_string() or super()._parse_table_parts( 745 is_db_reference=is_db_reference 746 ) 747 elif self._match_text_seq("PATTERN", "=>"): 748 pattern = self._parse_string() 749 else: 750 break 751 752 self._match(TokenType.COMMA) 753 754 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 755 else: 756 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 757 758 return table 759 760 def _parse_table( 761 self, 762 schema: bool = False, 763 joins: bool = False, 764 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 765 parse_bracket: bool = False, 766 is_db_reference: bool = False, 767 parse_partition: bool = False, 768 ) -> t.Optional[exp.Expression]: 769 table = super()._parse_table( 770 schema=schema, 771 joins=joins, 772 alias_tokens=alias_tokens, 773 parse_bracket=parse_bracket, 774 is_db_reference=is_db_reference, 775 parse_partition=parse_partition, 776 ) 777 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 778 table_from_rows = table.this 779 for arg in exp.TableFromRows.arg_types: 780 if arg != "this": 781 table_from_rows.set(arg, table.args.get(arg)) 782 783 table = table_from_rows 784 785 return table 786 787 def _parse_id_var( 788 self, 789 any_token: bool = True, 790 tokens: t.Optional[t.Collection[TokenType]] = None, 791 ) -> t.Optional[exp.Expression]: 792 if self._match_text_seq("IDENTIFIER", "("): 793 identifier = ( 794 super()._parse_id_var(any_token=any_token, tokens=tokens) 795 or self._parse_string() 796 ) 797 self._match_r_paren() 798 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 799 800 return super()._parse_id_var(any_token=any_token, tokens=tokens) 801 802 def _parse_show_snowflake(self, this: str) -> exp.Show: 803 scope = None 804 scope_kind = None 805 806 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 807 # which is syntactically valid but has no effect on the output 808 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 809 810 history = self._match_text_seq("HISTORY") 811 812 like = self._parse_string() if self._match(TokenType.LIKE) else None 813 814 if self._match(TokenType.IN): 815 if self._match_text_seq("ACCOUNT"): 816 scope_kind = "ACCOUNT" 817 elif self._match_text_seq("CLASS"): 818 scope_kind = "CLASS" 819 scope = self._parse_table_parts() 820 elif self._match_text_seq("APPLICATION"): 821 scope_kind = "APPLICATION" 822 if self._match_text_seq("PACKAGE"): 823 scope_kind += " PACKAGE" 824 scope = self._parse_table_parts() 825 elif self._match_set(self.DB_CREATABLES): 826 scope_kind = self._prev.text.upper() 827 if self._curr: 828 scope = self._parse_table_parts() 829 elif self._curr: 830 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 831 scope = self._parse_table_parts() 832 833 return self.expression( 834 exp.Show, 835 **{ 836 "terse": terse, 837 "this": this, 838 "history": history, 839 "like": like, 840 "scope": scope, 841 "scope_kind": scope_kind, 842 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 843 "limit": self._parse_limit(), 844 "from": self._parse_string() if self._match(TokenType.FROM) else None, 845 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 846 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 847 }, 848 ) 849 850 def _parse_put(self) -> exp.Put | exp.Command: 851 if self._curr.token_type != TokenType.STRING: 852 return self._parse_as_command(self._prev) 853 854 return self.expression( 855 exp.Put, 856 this=self._parse_string(), 857 target=self._parse_location_path(), 858 properties=self._parse_properties(), 859 ) 860 861 def _parse_location_property(self) -> exp.LocationProperty: 862 self._match(TokenType.EQ) 863 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 864 865 def _parse_file_location(self) -> t.Optional[exp.Expression]: 866 # Parse either a subquery or a staged file 867 return ( 868 self._parse_select(table=True, parse_subquery_alias=False) 869 if self._match(TokenType.L_PAREN, advance=False) 870 else self._parse_table_parts() 871 ) 872 873 def _parse_location_path(self) -> exp.Var: 874 start = self._curr 875 self._advance_any(ignore_reserved=True) 876 877 # We avoid consuming a comma token because external tables like @foo and @bar 878 # can be joined in a query with a comma separator, as well as closing paren 879 # in case of subqueries 880 while self._is_connected() and not self._match_set( 881 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 882 ): 883 self._advance_any(ignore_reserved=True) 884 885 return exp.var(self._find_sql(start, self._prev)) 886 887 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 888 this = super()._parse_lambda_arg() 889 890 if not this: 891 return this 892 893 typ = self._parse_types() 894 895 if typ: 896 return self.expression(exp.Cast, this=this, to=typ) 897 898 return this 899 900 def _parse_foreign_key(self) -> exp.ForeignKey: 901 # inlineFK, the REFERENCES columns are implied 902 if self._match(TokenType.REFERENCES, advance=False): 903 return self.expression(exp.ForeignKey) 904 905 # outoflineFK, explicitly names the columns 906 return super()._parse_foreign_key() 907 908 def _parse_file_format_property(self) -> exp.FileFormatProperty: 909 self._match(TokenType.EQ) 910 if self._match(TokenType.L_PAREN, advance=False): 911 expressions = self._parse_wrapped_options() 912 else: 913 expressions = [self._parse_format_name()] 914 915 return self.expression( 916 exp.FileFormatProperty, 917 expressions=expressions, 918 ) 919 920 def _parse_credentials_property(self) -> exp.CredentialsProperty: 921 return self.expression( 922 exp.CredentialsProperty, 923 expressions=self._parse_wrapped_options(), 924 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
926 class Tokenizer(tokens.Tokenizer): 927 STRING_ESCAPES = ["\\", "'"] 928 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 929 RAW_STRINGS = ["$$"] 930 COMMENTS = ["--", "//", ("/*", "*/")] 931 NESTED_COMMENTS = False 932 933 KEYWORDS = { 934 **tokens.Tokenizer.KEYWORDS, 935 "FILE://": TokenType.URI_START, 936 "BYTEINT": TokenType.INT, 937 "CHAR VARYING": TokenType.VARCHAR, 938 "CHARACTER VARYING": TokenType.VARCHAR, 939 "EXCLUDE": TokenType.EXCEPT, 940 "FILE FORMAT": TokenType.FILE_FORMAT, 941 "ILIKE ANY": TokenType.ILIKE_ANY, 942 "LIKE ANY": TokenType.LIKE_ANY, 943 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 944 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 945 "MINUS": TokenType.EXCEPT, 946 "NCHAR VARYING": TokenType.VARCHAR, 947 "PUT": TokenType.PUT, 948 "REMOVE": TokenType.COMMAND, 949 "RM": TokenType.COMMAND, 950 "SAMPLE": TokenType.TABLE_SAMPLE, 951 "SQL_DOUBLE": TokenType.DOUBLE, 952 "SQL_VARCHAR": TokenType.VARCHAR, 953 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 954 "TAG": TokenType.TAG, 955 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 956 "TOP": TokenType.TOP, 957 "WAREHOUSE": TokenType.WAREHOUSE, 958 "STAGE": TokenType.STAGE, 959 "STREAMLIT": TokenType.STREAMLIT, 960 } 961 KEYWORDS.pop("/*+") 962 963 SINGLE_TOKENS = { 964 **tokens.Tokenizer.SINGLE_TOKENS, 965 "$": TokenType.PARAMETER, 966 } 967 968 VAR_SINGLE_TOKENS = {"$"} 969 970 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
972 class Generator(generator.Generator): 973 PARAMETER_TOKEN = "$" 974 MATCHED_BY_SOURCE = False 975 SINGLE_STRING_INTERVAL = True 976 JOIN_HINTS = False 977 TABLE_HINTS = False 978 QUERY_HINTS = False 979 AGGREGATE_FILTER_SUPPORTED = False 980 SUPPORTS_TABLE_COPY = False 981 COLLATE_IS_FUNC = True 982 LIMIT_ONLY_LITERALS = True 983 JSON_KEY_VALUE_PAIR_SEP = "," 984 INSERT_OVERWRITE = " OVERWRITE INTO" 985 STRUCT_DELIMITER = ("(", ")") 986 COPY_PARAMS_ARE_WRAPPED = False 987 COPY_PARAMS_EQ_REQUIRED = True 988 STAR_EXCEPT = "EXCLUDE" 989 SUPPORTS_EXPLODING_PROJECTIONS = False 990 ARRAY_CONCAT_IS_VAR_LEN = False 991 SUPPORTS_CONVERT_TIMEZONE = True 992 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 993 SUPPORTS_MEDIAN = True 994 ARRAY_SIZE_NAME = "ARRAY_SIZE" 995 996 TRANSFORMS = { 997 **generator.Generator.TRANSFORMS, 998 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 999 exp.ArgMax: rename_func("MAX_BY"), 1000 exp.ArgMin: rename_func("MIN_BY"), 1001 exp.Array: inline_array_sql, 1002 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1003 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1004 exp.AtTimeZone: lambda self, e: self.func( 1005 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1006 ), 1007 exp.BitwiseOr: rename_func("BITOR"), 1008 exp.BitwiseXor: rename_func("BITXOR"), 1009 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1010 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1011 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1012 exp.DateAdd: date_delta_sql("DATEADD"), 1013 exp.DateDiff: date_delta_sql("DATEDIFF"), 1014 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1015 exp.DatetimeDiff: timestampdiff_sql, 1016 exp.DateStrToDate: datestrtodate_sql, 1017 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1018 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1019 exp.DayOfYear: rename_func("DAYOFYEAR"), 1020 exp.Explode: rename_func("FLATTEN"), 1021 exp.Extract: rename_func("DATE_PART"), 1022 exp.FileFormatProperty: lambda self, 1023 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1024 exp.FromTimeZone: lambda self, e: self.func( 1025 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1026 ), 1027 exp.GenerateSeries: lambda self, e: self.func( 1028 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1029 ), 1030 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1031 exp.If: if_sql(name="IFF", false_value="NULL"), 1032 exp.JSONExtractArray: _json_extract_value_array_sql, 1033 exp.JSONExtractScalar: lambda self, e: self.func( 1034 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1035 ), 1036 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1037 exp.JSONPathRoot: lambda *_: "", 1038 exp.JSONValueArray: _json_extract_value_array_sql, 1039 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1040 rename_func("EDITDISTANCE") 1041 ), 1042 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1043 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1044 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1045 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1046 exp.MakeInterval: no_make_interval_sql, 1047 exp.Max: max_or_greatest, 1048 exp.Min: min_or_least, 1049 exp.ParseJSON: lambda self, e: self.func( 1050 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1051 ), 1052 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1053 exp.PercentileCont: transforms.preprocess( 1054 [transforms.add_within_group_for_percentiles] 1055 ), 1056 exp.PercentileDisc: transforms.preprocess( 1057 [transforms.add_within_group_for_percentiles] 1058 ), 1059 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1060 exp.RegexpExtract: _regexpextract_sql, 1061 exp.RegexpExtractAll: _regexpextract_sql, 1062 exp.RegexpILike: _regexpilike_sql, 1063 exp.Rand: rename_func("RANDOM"), 1064 exp.Select: transforms.preprocess( 1065 [ 1066 transforms.eliminate_distinct_on, 1067 transforms.explode_projection_to_unnest(), 1068 transforms.eliminate_semi_and_anti_joins, 1069 _transform_generate_date_array, 1070 ] 1071 ), 1072 exp.SHA: rename_func("SHA1"), 1073 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1074 exp.StartsWith: rename_func("STARTSWITH"), 1075 exp.StrPosition: lambda self, e: strposition_sql( 1076 self, e, func_name="CHARINDEX", supports_position=True 1077 ), 1078 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1079 exp.Stuff: rename_func("INSERT"), 1080 exp.TimeAdd: date_delta_sql("TIMEADD"), 1081 exp.Timestamp: no_timestamp_sql, 1082 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1083 exp.TimestampDiff: lambda self, e: self.func( 1084 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1085 ), 1086 exp.TimestampTrunc: timestamptrunc_sql(), 1087 exp.TimeStrToTime: timestrtotime_sql, 1088 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1089 exp.ToArray: rename_func("TO_ARRAY"), 1090 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1091 exp.ToDouble: rename_func("TO_DOUBLE"), 1092 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1093 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1094 exp.TsOrDsToDate: lambda self, e: self.func( 1095 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1096 ), 1097 exp.TsOrDsToTime: lambda self, e: self.func( 1098 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1099 ), 1100 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1101 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1102 exp.Uuid: rename_func("UUID_STRING"), 1103 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1104 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1105 exp.Xor: rename_func("BOOLXOR"), 1106 } 1107 1108 SUPPORTED_JSON_PATH_PARTS = { 1109 exp.JSONPathKey, 1110 exp.JSONPathRoot, 1111 exp.JSONPathSubscript, 1112 } 1113 1114 TYPE_MAPPING = { 1115 **generator.Generator.TYPE_MAPPING, 1116 exp.DataType.Type.NESTED: "OBJECT", 1117 exp.DataType.Type.STRUCT: "OBJECT", 1118 } 1119 1120 TOKEN_MAPPING = { 1121 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1122 } 1123 1124 PROPERTIES_LOCATION = { 1125 **generator.Generator.PROPERTIES_LOCATION, 1126 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1127 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1128 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1129 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1130 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1131 } 1132 1133 UNSUPPORTED_VALUES_EXPRESSIONS = { 1134 exp.Map, 1135 exp.StarMap, 1136 exp.Struct, 1137 exp.VarMap, 1138 } 1139 1140 def with_properties(self, properties: exp.Properties) -> str: 1141 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1142 1143 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1144 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1145 values_as_table = False 1146 1147 return super().values_sql(expression, values_as_table=values_as_table) 1148 1149 def datatype_sql(self, expression: exp.DataType) -> str: 1150 expressions = expression.expressions 1151 if ( 1152 expressions 1153 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1154 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1155 ): 1156 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1157 return "OBJECT" 1158 1159 return super().datatype_sql(expression) 1160 1161 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1162 return self.func( 1163 "TO_NUMBER", 1164 expression.this, 1165 expression.args.get("format"), 1166 expression.args.get("precision"), 1167 expression.args.get("scale"), 1168 ) 1169 1170 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1171 milli = expression.args.get("milli") 1172 if milli is not None: 1173 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1174 expression.set("nano", milli_to_nano) 1175 1176 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1177 1178 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1179 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1180 return self.func("TO_GEOGRAPHY", expression.this) 1181 if expression.is_type(exp.DataType.Type.GEOMETRY): 1182 return self.func("TO_GEOMETRY", expression.this) 1183 1184 return super().cast_sql(expression, safe_prefix=safe_prefix) 1185 1186 def trycast_sql(self, expression: exp.TryCast) -> str: 1187 value = expression.this 1188 1189 if value.type is None: 1190 from sqlglot.optimizer.annotate_types import annotate_types 1191 1192 value = annotate_types(value, dialect=self.dialect) 1193 1194 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1195 return super().trycast_sql(expression) 1196 1197 # TRY_CAST only works for string values in Snowflake 1198 return self.cast_sql(expression) 1199 1200 def log_sql(self, expression: exp.Log) -> str: 1201 if not expression.expression: 1202 return self.func("LN", expression.this) 1203 1204 return super().log_sql(expression) 1205 1206 def unnest_sql(self, expression: exp.Unnest) -> str: 1207 unnest_alias = expression.args.get("alias") 1208 offset = expression.args.get("offset") 1209 1210 columns = [ 1211 exp.to_identifier("seq"), 1212 exp.to_identifier("key"), 1213 exp.to_identifier("path"), 1214 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1215 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1216 or exp.to_identifier("value"), 1217 exp.to_identifier("this"), 1218 ] 1219 1220 if unnest_alias: 1221 unnest_alias.set("columns", columns) 1222 else: 1223 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1224 1225 table_input = self.sql(expression.expressions[0]) 1226 if not table_input.startswith("INPUT =>"): 1227 table_input = f"INPUT => {table_input}" 1228 1229 explode = f"TABLE(FLATTEN({table_input}))" 1230 alias = self.sql(unnest_alias) 1231 alias = f" AS {alias}" if alias else "" 1232 return f"{explode}{alias}" 1233 1234 def show_sql(self, expression: exp.Show) -> str: 1235 terse = "TERSE " if expression.args.get("terse") else "" 1236 history = " HISTORY" if expression.args.get("history") else "" 1237 like = self.sql(expression, "like") 1238 like = f" LIKE {like}" if like else "" 1239 1240 scope = self.sql(expression, "scope") 1241 scope = f" {scope}" if scope else "" 1242 1243 scope_kind = self.sql(expression, "scope_kind") 1244 if scope_kind: 1245 scope_kind = f" IN {scope_kind}" 1246 1247 starts_with = self.sql(expression, "starts_with") 1248 if starts_with: 1249 starts_with = f" STARTS WITH {starts_with}" 1250 1251 limit = self.sql(expression, "limit") 1252 1253 from_ = self.sql(expression, "from") 1254 if from_: 1255 from_ = f" FROM {from_}" 1256 1257 privileges = self.expressions(expression, key="privileges", flat=True) 1258 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1259 1260 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1261 1262 def describe_sql(self, expression: exp.Describe) -> str: 1263 # Default to table if kind is unknown 1264 kind_value = expression.args.get("kind") or "TABLE" 1265 kind = f" {kind_value}" if kind_value else "" 1266 this = f" {self.sql(expression, 'this')}" 1267 expressions = self.expressions(expression, flat=True) 1268 expressions = f" {expressions}" if expressions else "" 1269 return f"DESCRIBE{kind}{this}{expressions}" 1270 1271 def generatedasidentitycolumnconstraint_sql( 1272 self, expression: exp.GeneratedAsIdentityColumnConstraint 1273 ) -> str: 1274 start = expression.args.get("start") 1275 start = f" START {start}" if start else "" 1276 increment = expression.args.get("increment") 1277 increment = f" INCREMENT {increment}" if increment else "" 1278 return f"AUTOINCREMENT{start}{increment}" 1279 1280 def cluster_sql(self, expression: exp.Cluster) -> str: 1281 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1282 1283 def struct_sql(self, expression: exp.Struct) -> str: 1284 keys = [] 1285 values = [] 1286 1287 for i, e in enumerate(expression.expressions): 1288 if isinstance(e, exp.PropertyEQ): 1289 keys.append( 1290 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1291 ) 1292 values.append(e.expression) 1293 else: 1294 keys.append(exp.Literal.string(f"_{i}")) 1295 values.append(e) 1296 1297 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1298 1299 @unsupported_args("weight", "accuracy") 1300 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1301 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1302 1303 def alterset_sql(self, expression: exp.AlterSet) -> str: 1304 exprs = self.expressions(expression, flat=True) 1305 exprs = f" {exprs}" if exprs else "" 1306 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1307 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1308 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1309 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1310 tag = self.expressions(expression, key="tag", flat=True) 1311 tag = f" TAG {tag}" if tag else "" 1312 1313 return f"SET{exprs}{file_format}{copy_options}{tag}" 1314 1315 def strtotime_sql(self, expression: exp.StrToTime): 1316 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1317 return self.func( 1318 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1319 ) 1320 1321 def timestampsub_sql(self, expression: exp.TimestampSub): 1322 return self.sql( 1323 exp.TimestampAdd( 1324 this=expression.this, 1325 expression=expression.expression * -1, 1326 unit=expression.unit, 1327 ) 1328 ) 1329 1330 def jsonextract_sql(self, expression: exp.JSONExtract): 1331 this = expression.this 1332 1333 # JSON strings are valid coming from other dialects such as BQ 1334 return self.func( 1335 "GET_PATH", 1336 exp.ParseJSON(this=this) if this.is_string else this, 1337 expression.expression, 1338 ) 1339 1340 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1341 this = expression.this 1342 if not isinstance(this, exp.TsOrDsToTimestamp): 1343 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1344 1345 return self.func("TO_CHAR", this, self.format_time(expression)) 1346 1347 def datesub_sql(self, expression: exp.DateSub) -> str: 1348 value = expression.expression 1349 if value: 1350 value.replace(value * (-1)) 1351 else: 1352 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1353 1354 return date_delta_sql("DATEADD")(self, expression) 1355 1356 def select_sql(self, expression: exp.Select) -> str: 1357 limit = expression.args.get("limit") 1358 offset = expression.args.get("offset") 1359 if offset and not limit: 1360 expression.limit(exp.Null(), copy=False) 1361 return super().select_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1149 def datatype_sql(self, expression: exp.DataType) -> str: 1150 expressions = expression.expressions 1151 if ( 1152 expressions 1153 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1154 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1155 ): 1156 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1157 return "OBJECT" 1158 1159 return super().datatype_sql(expression)
1170 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1171 milli = expression.args.get("milli") 1172 if milli is not None: 1173 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1174 expression.set("nano", milli_to_nano) 1175 1176 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1178 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1179 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1180 return self.func("TO_GEOGRAPHY", expression.this) 1181 if expression.is_type(exp.DataType.Type.GEOMETRY): 1182 return self.func("TO_GEOMETRY", expression.this) 1183 1184 return super().cast_sql(expression, safe_prefix=safe_prefix)
1186 def trycast_sql(self, expression: exp.TryCast) -> str: 1187 value = expression.this 1188 1189 if value.type is None: 1190 from sqlglot.optimizer.annotate_types import annotate_types 1191 1192 value = annotate_types(value, dialect=self.dialect) 1193 1194 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1195 return super().trycast_sql(expression) 1196 1197 # TRY_CAST only works for string values in Snowflake 1198 return self.cast_sql(expression)
1206 def unnest_sql(self, expression: exp.Unnest) -> str: 1207 unnest_alias = expression.args.get("alias") 1208 offset = expression.args.get("offset") 1209 1210 columns = [ 1211 exp.to_identifier("seq"), 1212 exp.to_identifier("key"), 1213 exp.to_identifier("path"), 1214 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1215 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1216 or exp.to_identifier("value"), 1217 exp.to_identifier("this"), 1218 ] 1219 1220 if unnest_alias: 1221 unnest_alias.set("columns", columns) 1222 else: 1223 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1224 1225 table_input = self.sql(expression.expressions[0]) 1226 if not table_input.startswith("INPUT =>"): 1227 table_input = f"INPUT => {table_input}" 1228 1229 explode = f"TABLE(FLATTEN({table_input}))" 1230 alias = self.sql(unnest_alias) 1231 alias = f" AS {alias}" if alias else "" 1232 return f"{explode}{alias}"
1234 def show_sql(self, expression: exp.Show) -> str: 1235 terse = "TERSE " if expression.args.get("terse") else "" 1236 history = " HISTORY" if expression.args.get("history") else "" 1237 like = self.sql(expression, "like") 1238 like = f" LIKE {like}" if like else "" 1239 1240 scope = self.sql(expression, "scope") 1241 scope = f" {scope}" if scope else "" 1242 1243 scope_kind = self.sql(expression, "scope_kind") 1244 if scope_kind: 1245 scope_kind = f" IN {scope_kind}" 1246 1247 starts_with = self.sql(expression, "starts_with") 1248 if starts_with: 1249 starts_with = f" STARTS WITH {starts_with}" 1250 1251 limit = self.sql(expression, "limit") 1252 1253 from_ = self.sql(expression, "from") 1254 if from_: 1255 from_ = f" FROM {from_}" 1256 1257 privileges = self.expressions(expression, key="privileges", flat=True) 1258 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1259 1260 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1262 def describe_sql(self, expression: exp.Describe) -> str: 1263 # Default to table if kind is unknown 1264 kind_value = expression.args.get("kind") or "TABLE" 1265 kind = f" {kind_value}" if kind_value else "" 1266 this = f" {self.sql(expression, 'this')}" 1267 expressions = self.expressions(expression, flat=True) 1268 expressions = f" {expressions}" if expressions else "" 1269 return f"DESCRIBE{kind}{this}{expressions}"
1271 def generatedasidentitycolumnconstraint_sql( 1272 self, expression: exp.GeneratedAsIdentityColumnConstraint 1273 ) -> str: 1274 start = expression.args.get("start") 1275 start = f" START {start}" if start else "" 1276 increment = expression.args.get("increment") 1277 increment = f" INCREMENT {increment}" if increment else "" 1278 return f"AUTOINCREMENT{start}{increment}"
1283 def struct_sql(self, expression: exp.Struct) -> str: 1284 keys = [] 1285 values = [] 1286 1287 for i, e in enumerate(expression.expressions): 1288 if isinstance(e, exp.PropertyEQ): 1289 keys.append( 1290 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1291 ) 1292 values.append(e.expression) 1293 else: 1294 keys.append(exp.Literal.string(f"_{i}")) 1295 values.append(e) 1296 1297 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1303 def alterset_sql(self, expression: exp.AlterSet) -> str: 1304 exprs = self.expressions(expression, flat=True) 1305 exprs = f" {exprs}" if exprs else "" 1306 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1307 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1308 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1309 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1310 tag = self.expressions(expression, key="tag", flat=True) 1311 tag = f" TAG {tag}" if tag else "" 1312 1313 return f"SET{exprs}{file_format}{copy_options}{tag}"
1347 def datesub_sql(self, expression: exp.DateSub) -> str: 1348 value = expression.expression 1349 if value: 1350 value.replace(value * (-1)) 1351 else: 1352 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1353 1354 return date_delta_sql("DATEADD")(self, expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- put_sql