sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 build_timetostr_or_tochar, 10 binary_from_function, 11 build_default_decimal_type, 12 build_timestamp_from_parts, 13 date_delta_sql, 14 date_trunc_to_time, 15 datestrtodate_sql, 16 build_formatted_time, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 rename_func, 22 timestamptrunc_sql, 23 timestrtotime_sql, 24 var_map_sql, 25 map_date_part, 26 no_timestamp_sql, 27 strposition_sql, 28 timestampdiff_sql, 29 no_make_interval_sql, 30 groupconcat_sql, 31) 32from sqlglot.generator import unsupported_args 33from sqlglot.helper import flatten, is_float, is_int, seq_get 34from sqlglot.tokens import TokenType 35 36if t.TYPE_CHECKING: 37 from sqlglot._typing import E, B 38 39 40# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 41def _build_datetime( 42 name: str, kind: exp.DataType.Type, safe: bool = False 43) -> t.Callable[[t.List], exp.Func]: 44 def _builder(args: t.List) -> exp.Func: 45 value = seq_get(args, 0) 46 scale_or_fmt = seq_get(args, 1) 47 48 int_value = value is not None and is_int(value.name) 49 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 50 51 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 52 # Converts calls like `TO_TIME('01:02:03')` into casts 53 if len(args) == 1 and value.is_string and not int_value: 54 return ( 55 exp.TryCast(this=value, to=exp.DataType.build(kind)) 56 if safe 57 else exp.cast(value, kind) 58 ) 59 60 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 61 # cases so we can transpile them, since they're relatively common 62 if kind == exp.DataType.Type.TIMESTAMP: 63 if not safe and (int_value or int_scale_or_fmt): 64 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 65 # it's not easily transpilable 66 return exp.UnixToTime(this=value, scale=scale_or_fmt) 67 if not int_scale_or_fmt and not is_float(value.name): 68 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 69 expr.set("safe", safe) 70 return expr 71 72 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 73 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 74 formatted_exp = build_formatted_time(klass, "snowflake")(args) 75 formatted_exp.set("safe", safe) 76 return formatted_exp 77 78 return exp.Anonymous(this=name, expressions=args) 79 80 return _builder 81 82 83def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 84 expression = parser.build_var_map(args) 85 86 if isinstance(expression, exp.StarMap): 87 return expression 88 89 return exp.Struct( 90 expressions=[ 91 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 92 ] 93 ) 94 95 96def _build_datediff(args: t.List) -> exp.DateDiff: 97 return exp.DateDiff( 98 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 99 ) 100 101 102def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 103 def _builder(args: t.List) -> E: 104 return expr_type( 105 this=seq_get(args, 2), 106 expression=seq_get(args, 1), 107 unit=map_date_part(seq_get(args, 0)), 108 ) 109 110 return _builder 111 112 113def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 114 def _builder(args: t.List) -> B | exp.Anonymous: 115 if len(args) == 3: 116 return exp.Anonymous(this=name, expressions=args) 117 118 return binary_from_function(expr_type)(args) 119 120 return _builder 121 122 123# https://docs.snowflake.com/en/sql-reference/functions/div0 124def _build_if_from_div0(args: t.List) -> exp.If: 125 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 126 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 127 128 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 129 exp.Is(this=lhs, expression=exp.null()).not_() 130 ) 131 true = exp.Literal.number(0) 132 false = exp.Div(this=lhs, expression=rhs) 133 return exp.If(this=cond, true=true, false=false) 134 135 136# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 137def _build_if_from_zeroifnull(args: t.List) -> exp.If: 138 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 139 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 140 141 142# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 143def _build_if_from_nullifzero(args: t.List) -> exp.If: 144 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 145 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 146 147 148def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 149 flag = expression.text("flag") 150 151 if "i" not in flag: 152 flag += "i" 153 154 return self.func( 155 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 156 ) 157 158 159def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 160 regexp_replace = exp.RegexpReplace.from_arg_list(args) 161 162 if not regexp_replace.args.get("replacement"): 163 regexp_replace.set("replacement", exp.Literal.string("")) 164 165 return regexp_replace 166 167 168def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 169 def _parse(self: Snowflake.Parser) -> exp.Show: 170 return self._parse_show_snowflake(*args, **kwargs) 171 172 return _parse 173 174 175def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 176 trunc = date_trunc_to_time(args) 177 trunc.set("unit", map_date_part(trunc.args["unit"])) 178 return trunc 179 180 181def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 182 """ 183 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 184 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 185 186 Example: 187 >>> from sqlglot import parse_one 188 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 189 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 190 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 191 """ 192 if isinstance(expression, exp.Pivot): 193 if expression.unpivot: 194 expression = transforms.unqualify_columns(expression) 195 else: 196 for field in expression.fields: 197 field_expr = seq_get(field.expressions if field else [], 0) 198 199 if isinstance(field_expr, exp.PivotAny): 200 unqualified_field_expr = transforms.unqualify_columns(field_expr) 201 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 202 203 return expression 204 205 206def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 207 assert isinstance(expression, exp.Create) 208 209 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 210 if expression.this in exp.DataType.NESTED_TYPES: 211 expression.set("expressions", None) 212 return expression 213 214 props = expression.args.get("properties") 215 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 216 for schema_expression in expression.this.expressions: 217 if isinstance(schema_expression, exp.ColumnDef): 218 column_type = schema_expression.kind 219 if isinstance(column_type, exp.DataType): 220 column_type.transform(_flatten_structured_type, copy=False) 221 222 return expression 223 224 225def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 226 generate_date_array = unnest.expressions[0] 227 start = generate_date_array.args.get("start") 228 end = generate_date_array.args.get("end") 229 step = generate_date_array.args.get("step") 230 231 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 232 return 233 234 unit = step.args.get("unit") 235 236 unnest_alias = unnest.args.get("alias") 237 if unnest_alias: 238 unnest_alias = unnest_alias.copy() 239 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 240 else: 241 sequence_value_name = "value" 242 243 # We'll add the next sequence value to the starting date and project the result 244 date_add = _build_date_time_add(exp.DateAdd)( 245 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 246 ).as_(sequence_value_name) 247 248 # We use DATEDIFF to compute the number of sequence values needed 249 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 250 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 251 ) 252 253 unnest.set("expressions", [number_sequence]) 254 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 255 256 257def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 258 if isinstance(expression, exp.Select): 259 for generate_date_array in expression.find_all(exp.GenerateDateArray): 260 parent = generate_date_array.parent 261 262 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 263 # query is the following (it'll be unnested properly on the next iteration due to copy): 264 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 265 if not isinstance(parent, exp.Unnest): 266 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 267 generate_date_array.replace( 268 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 269 ) 270 271 if ( 272 isinstance(parent, exp.Unnest) 273 and isinstance(parent.parent, (exp.From, exp.Join)) 274 and len(parent.expressions) == 1 275 ): 276 _unnest_generate_date_array(parent) 277 278 return expression 279 280 281def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 282 def _builder(args: t.List) -> E: 283 return expr_type( 284 this=seq_get(args, 0), 285 expression=seq_get(args, 1), 286 position=seq_get(args, 2), 287 occurrence=seq_get(args, 3), 288 parameters=seq_get(args, 4), 289 group=seq_get(args, 5) or exp.Literal.number(0), 290 ) 291 292 return _builder 293 294 295def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 296 # Other dialects don't support all of the following parameters, so we need to 297 # generate default values as necessary to ensure the transpilation is correct 298 group = expression.args.get("group") 299 300 # To avoid generating all these default values, we set group to None if 301 # it's 0 (also default value) which doesn't trigger the following chain 302 if group and group.name == "0": 303 group = None 304 305 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 306 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 307 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 308 309 return self.func( 310 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 311 expression.this, 312 expression.expression, 313 position, 314 occurrence, 315 parameters, 316 group, 317 ) 318 319 320def _json_extract_value_array_sql( 321 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 322) -> str: 323 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 324 ident = exp.to_identifier("x") 325 326 if isinstance(expression, exp.JSONValueArray): 327 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 328 else: 329 this = exp.ParseJSON(this=f"TO_JSON({ident})") 330 331 transform_lambda = exp.Lambda(expressions=[ident], this=this) 332 333 return self.func("TRANSFORM", json_extract, transform_lambda) 334 335 336class Snowflake(Dialect): 337 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 338 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 339 NULL_ORDERING = "nulls_are_large" 340 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 341 SUPPORTS_USER_DEFINED_TYPES = False 342 SUPPORTS_SEMI_ANTI_JOIN = False 343 PREFER_CTE_ALIAS_COLUMN = True 344 TABLESAMPLE_SIZE_IS_PERCENT = True 345 COPY_PARAMS_ARE_CSV = False 346 ARRAY_AGG_INCLUDES_NULLS = None 347 348 TIME_MAPPING = { 349 "YYYY": "%Y", 350 "yyyy": "%Y", 351 "YY": "%y", 352 "yy": "%y", 353 "MMMM": "%B", 354 "mmmm": "%B", 355 "MON": "%b", 356 "mon": "%b", 357 "MM": "%m", 358 "mm": "%m", 359 "DD": "%d", 360 "dd": "%-d", 361 "DY": "%a", 362 "dy": "%w", 363 "HH24": "%H", 364 "hh24": "%H", 365 "HH12": "%I", 366 "hh12": "%I", 367 "MI": "%M", 368 "mi": "%M", 369 "SS": "%S", 370 "ss": "%S", 371 "FF6": "%f", 372 "ff6": "%f", 373 } 374 375 def quote_identifier(self, expression: E, identify: bool = True) -> E: 376 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 377 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 378 if ( 379 isinstance(expression, exp.Identifier) 380 and isinstance(expression.parent, exp.Table) 381 and expression.name.lower() == "dual" 382 ): 383 return expression # type: ignore 384 385 return super().quote_identifier(expression, identify=identify) 386 387 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 388 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 389 SINGLE_TOKENS.pop("$") 390 391 class Parser(parser.Parser): 392 IDENTIFY_PIVOT_STRINGS = True 393 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 394 COLON_IS_VARIANT_EXTRACT = True 395 396 ID_VAR_TOKENS = { 397 *parser.Parser.ID_VAR_TOKENS, 398 TokenType.MATCH_CONDITION, 399 } 400 401 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 402 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 403 404 FUNCTIONS = { 405 **parser.Parser.FUNCTIONS, 406 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 407 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 408 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 409 this=seq_get(args, 1), expression=seq_get(args, 0) 410 ), 411 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 412 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 413 start=seq_get(args, 0), 414 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 415 step=seq_get(args, 2), 416 ), 417 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 418 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 419 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 420 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 421 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 422 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 423 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 424 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 425 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 426 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 427 "DATE_TRUNC": _date_trunc_to_time, 428 "DATEADD": _build_date_time_add(exp.DateAdd), 429 "DATEDIFF": _build_datediff, 430 "DIV0": _build_if_from_div0, 431 "EDITDISTANCE": lambda args: exp.Levenshtein( 432 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 433 ), 434 "FLATTEN": exp.Explode.from_arg_list, 435 "GET_PATH": lambda args, dialect: exp.JSONExtract( 436 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 437 ), 438 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 439 "IFF": exp.If.from_arg_list, 440 "LAST_DAY": lambda args: exp.LastDay( 441 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 442 ), 443 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 444 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 445 "NULLIFZERO": _build_if_from_nullifzero, 446 "OBJECT_CONSTRUCT": _build_object_construct, 447 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 448 "REGEXP_REPLACE": _build_regexp_replace, 449 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 450 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 451 "RLIKE": exp.RegexpLike.from_arg_list, 452 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 453 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 454 "TIMEADD": _build_date_time_add(exp.TimeAdd), 455 "TIMEDIFF": _build_datediff, 456 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 457 "TIMESTAMPDIFF": _build_datediff, 458 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 459 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 460 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 461 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 462 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 463 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 464 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 465 "TRY_TO_TIMESTAMP": _build_datetime( 466 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 467 ), 468 "TO_CHAR": build_timetostr_or_tochar, 469 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 470 "TO_NUMBER": lambda args: exp.ToNumber( 471 this=seq_get(args, 0), 472 format=seq_get(args, 1), 473 precision=seq_get(args, 2), 474 scale=seq_get(args, 3), 475 ), 476 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 477 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 478 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 479 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 480 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 481 "TO_VARCHAR": exp.ToChar.from_arg_list, 482 "ZEROIFNULL": _build_if_from_zeroifnull, 483 } 484 485 FUNCTION_PARSERS = { 486 **parser.Parser.FUNCTION_PARSERS, 487 "DATE_PART": lambda self: self._parse_date_part(), 488 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 489 "LISTAGG": lambda self: self._parse_string_agg(), 490 } 491 FUNCTION_PARSERS.pop("TRIM") 492 493 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 494 495 RANGE_PARSERS = { 496 **parser.Parser.RANGE_PARSERS, 497 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 498 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 499 } 500 501 ALTER_PARSERS = { 502 **parser.Parser.ALTER_PARSERS, 503 "UNSET": lambda self: self.expression( 504 exp.Set, 505 tag=self._match_text_seq("TAG"), 506 expressions=self._parse_csv(self._parse_id_var), 507 unset=True, 508 ), 509 } 510 511 STATEMENT_PARSERS = { 512 **parser.Parser.STATEMENT_PARSERS, 513 TokenType.PUT: lambda self: self._parse_put(), 514 TokenType.SHOW: lambda self: self._parse_show(), 515 } 516 517 PROPERTY_PARSERS = { 518 **parser.Parser.PROPERTY_PARSERS, 519 "CREDENTIALS": lambda self: self._parse_credentials_property(), 520 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 521 "LOCATION": lambda self: self._parse_location_property(), 522 "TAG": lambda self: self._parse_tag(), 523 "USING": lambda self: self._match_text_seq("TEMPLATE") 524 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 525 } 526 527 TYPE_CONVERTERS = { 528 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 529 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 530 } 531 532 SHOW_PARSERS = { 533 "DATABASES": _show_parser("DATABASES"), 534 "TERSE DATABASES": _show_parser("DATABASES"), 535 "SCHEMAS": _show_parser("SCHEMAS"), 536 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 537 "OBJECTS": _show_parser("OBJECTS"), 538 "TERSE OBJECTS": _show_parser("OBJECTS"), 539 "TABLES": _show_parser("TABLES"), 540 "TERSE TABLES": _show_parser("TABLES"), 541 "VIEWS": _show_parser("VIEWS"), 542 "TERSE VIEWS": _show_parser("VIEWS"), 543 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 544 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 545 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 546 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 547 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 548 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 549 "SEQUENCES": _show_parser("SEQUENCES"), 550 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 551 "STAGES": _show_parser("STAGES"), 552 "COLUMNS": _show_parser("COLUMNS"), 553 "USERS": _show_parser("USERS"), 554 "TERSE USERS": _show_parser("USERS"), 555 "FILE FORMATS": _show_parser("FILE FORMATS"), 556 "FUNCTIONS": _show_parser("FUNCTIONS"), 557 "PROCEDURES": _show_parser("PROCEDURES"), 558 "WAREHOUSES": _show_parser("WAREHOUSES"), 559 } 560 561 CONSTRAINT_PARSERS = { 562 **parser.Parser.CONSTRAINT_PARSERS, 563 "WITH": lambda self: self._parse_with_constraint(), 564 "MASKING": lambda self: self._parse_with_constraint(), 565 "PROJECTION": lambda self: self._parse_with_constraint(), 566 "TAG": lambda self: self._parse_with_constraint(), 567 } 568 569 STAGED_FILE_SINGLE_TOKENS = { 570 TokenType.DOT, 571 TokenType.MOD, 572 TokenType.SLASH, 573 } 574 575 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 576 577 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 578 579 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 580 581 LAMBDAS = { 582 **parser.Parser.LAMBDAS, 583 TokenType.ARROW: lambda self, expressions: self.expression( 584 exp.Lambda, 585 this=self._replace_lambda( 586 self._parse_assignment(), 587 expressions, 588 ), 589 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 590 ), 591 } 592 593 def _parse_use(self) -> exp.Use: 594 if self._match_text_seq("SECONDARY", "ROLES"): 595 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 596 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 597 return self.expression( 598 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 599 ) 600 601 return super()._parse_use() 602 603 def _negate_range( 604 self, this: t.Optional[exp.Expression] = None 605 ) -> t.Optional[exp.Expression]: 606 if not this: 607 return this 608 609 query = this.args.get("query") 610 if isinstance(this, exp.In) and isinstance(query, exp.Query): 611 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 612 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 613 # which can produce different results (most likely a SnowFlake bug). 614 # 615 # https://docs.snowflake.com/en/sql-reference/functions/in 616 # Context: https://github.com/tobymao/sqlglot/issues/3890 617 return self.expression( 618 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 619 ) 620 621 return self.expression(exp.Not, this=this) 622 623 def _parse_tag(self) -> exp.Tags: 624 return self.expression( 625 exp.Tags, 626 expressions=self._parse_wrapped_csv(self._parse_property), 627 ) 628 629 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 630 if self._prev.token_type != TokenType.WITH: 631 self._retreat(self._index - 1) 632 633 if self._match_text_seq("MASKING", "POLICY"): 634 policy = self._parse_column() 635 return self.expression( 636 exp.MaskingPolicyColumnConstraint, 637 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 638 expressions=self._match(TokenType.USING) 639 and self._parse_wrapped_csv(self._parse_id_var), 640 ) 641 if self._match_text_seq("PROJECTION", "POLICY"): 642 policy = self._parse_column() 643 return self.expression( 644 exp.ProjectionPolicyColumnConstraint, 645 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 646 ) 647 if self._match(TokenType.TAG): 648 return self._parse_tag() 649 650 return None 651 652 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 653 if self._match(TokenType.TAG): 654 return self._parse_tag() 655 656 return super()._parse_with_property() 657 658 def _parse_create(self) -> exp.Create | exp.Command: 659 expression = super()._parse_create() 660 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 661 # Replace the Table node with the enclosed Identifier 662 expression.this.replace(expression.this.this) 663 664 return expression 665 666 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 667 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 668 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 669 this = self._parse_var() or self._parse_type() 670 671 if not this: 672 return None 673 674 self._match(TokenType.COMMA) 675 expression = self._parse_bitwise() 676 this = map_date_part(this) 677 name = this.name.upper() 678 679 if name.startswith("EPOCH"): 680 if name == "EPOCH_MILLISECOND": 681 scale = 10**3 682 elif name == "EPOCH_MICROSECOND": 683 scale = 10**6 684 elif name == "EPOCH_NANOSECOND": 685 scale = 10**9 686 else: 687 scale = None 688 689 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 690 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 691 692 if scale: 693 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 694 695 return to_unix 696 697 return self.expression(exp.Extract, this=this, expression=expression) 698 699 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 700 if is_map: 701 # Keys are strings in Snowflake's objects, see also: 702 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 703 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 704 return self._parse_slice(self._parse_string()) 705 706 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 707 708 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 709 lateral = super()._parse_lateral() 710 if not lateral: 711 return lateral 712 713 if isinstance(lateral.this, exp.Explode): 714 table_alias = lateral.args.get("alias") 715 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 716 if table_alias and not table_alias.args.get("columns"): 717 table_alias.set("columns", columns) 718 elif not table_alias: 719 exp.alias_(lateral, "_flattened", table=columns, copy=False) 720 721 return lateral 722 723 def _parse_table_parts( 724 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 725 ) -> exp.Table: 726 # https://docs.snowflake.com/en/user-guide/querying-stage 727 if self._match(TokenType.STRING, advance=False): 728 table = self._parse_string() 729 elif self._match_text_seq("@", advance=False): 730 table = self._parse_location_path() 731 else: 732 table = None 733 734 if table: 735 file_format = None 736 pattern = None 737 738 wrapped = self._match(TokenType.L_PAREN) 739 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 740 if self._match_text_seq("FILE_FORMAT", "=>"): 741 file_format = self._parse_string() or super()._parse_table_parts( 742 is_db_reference=is_db_reference 743 ) 744 elif self._match_text_seq("PATTERN", "=>"): 745 pattern = self._parse_string() 746 else: 747 break 748 749 self._match(TokenType.COMMA) 750 751 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 752 else: 753 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 754 755 return table 756 757 def _parse_table( 758 self, 759 schema: bool = False, 760 joins: bool = False, 761 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 762 parse_bracket: bool = False, 763 is_db_reference: bool = False, 764 parse_partition: bool = False, 765 ) -> t.Optional[exp.Expression]: 766 table = super()._parse_table( 767 schema=schema, 768 joins=joins, 769 alias_tokens=alias_tokens, 770 parse_bracket=parse_bracket, 771 is_db_reference=is_db_reference, 772 parse_partition=parse_partition, 773 ) 774 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 775 table_from_rows = table.this 776 for arg in exp.TableFromRows.arg_types: 777 if arg != "this": 778 table_from_rows.set(arg, table.args.get(arg)) 779 780 table = table_from_rows 781 782 return table 783 784 def _parse_id_var( 785 self, 786 any_token: bool = True, 787 tokens: t.Optional[t.Collection[TokenType]] = None, 788 ) -> t.Optional[exp.Expression]: 789 if self._match_text_seq("IDENTIFIER", "("): 790 identifier = ( 791 super()._parse_id_var(any_token=any_token, tokens=tokens) 792 or self._parse_string() 793 ) 794 self._match_r_paren() 795 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 796 797 return super()._parse_id_var(any_token=any_token, tokens=tokens) 798 799 def _parse_show_snowflake(self, this: str) -> exp.Show: 800 scope = None 801 scope_kind = None 802 803 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 804 # which is syntactically valid but has no effect on the output 805 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 806 807 history = self._match_text_seq("HISTORY") 808 809 like = self._parse_string() if self._match(TokenType.LIKE) else None 810 811 if self._match(TokenType.IN): 812 if self._match_text_seq("ACCOUNT"): 813 scope_kind = "ACCOUNT" 814 elif self._match_text_seq("CLASS"): 815 scope_kind = "CLASS" 816 scope = self._parse_table_parts() 817 elif self._match_text_seq("APPLICATION"): 818 scope_kind = "APPLICATION" 819 if self._match_text_seq("PACKAGE"): 820 scope_kind += " PACKAGE" 821 scope = self._parse_table_parts() 822 elif self._match_set(self.DB_CREATABLES): 823 scope_kind = self._prev.text.upper() 824 if self._curr: 825 scope = self._parse_table_parts() 826 elif self._curr: 827 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 828 scope = self._parse_table_parts() 829 830 return self.expression( 831 exp.Show, 832 **{ 833 "terse": terse, 834 "this": this, 835 "history": history, 836 "like": like, 837 "scope": scope, 838 "scope_kind": scope_kind, 839 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 840 "limit": self._parse_limit(), 841 "from": self._parse_string() if self._match(TokenType.FROM) else None, 842 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 843 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 844 }, 845 ) 846 847 def _parse_put(self) -> exp.Put | exp.Command: 848 if self._curr.token_type != TokenType.STRING: 849 return self._parse_as_command(self._prev) 850 851 return self.expression( 852 exp.Put, 853 this=self._parse_string(), 854 target=self._parse_location_path(), 855 properties=self._parse_properties(), 856 ) 857 858 def _parse_location_property(self) -> exp.LocationProperty: 859 self._match(TokenType.EQ) 860 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 861 862 def _parse_file_location(self) -> t.Optional[exp.Expression]: 863 # Parse either a subquery or a staged file 864 return ( 865 self._parse_select(table=True, parse_subquery_alias=False) 866 if self._match(TokenType.L_PAREN, advance=False) 867 else self._parse_table_parts() 868 ) 869 870 def _parse_location_path(self) -> exp.Var: 871 start = self._curr 872 self._advance_any(ignore_reserved=True) 873 874 # We avoid consuming a comma token because external tables like @foo and @bar 875 # can be joined in a query with a comma separator, as well as closing paren 876 # in case of subqueries 877 while self._is_connected() and not self._match_set( 878 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 879 ): 880 self._advance_any(ignore_reserved=True) 881 882 return exp.var(self._find_sql(start, self._prev)) 883 884 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 885 this = super()._parse_lambda_arg() 886 887 if not this: 888 return this 889 890 typ = self._parse_types() 891 892 if typ: 893 return self.expression(exp.Cast, this=this, to=typ) 894 895 return this 896 897 def _parse_foreign_key(self) -> exp.ForeignKey: 898 # inlineFK, the REFERENCES columns are implied 899 if self._match(TokenType.REFERENCES, advance=False): 900 return self.expression(exp.ForeignKey) 901 902 # outoflineFK, explicitly names the columns 903 return super()._parse_foreign_key() 904 905 def _parse_file_format_property(self) -> exp.FileFormatProperty: 906 self._match(TokenType.EQ) 907 if self._match(TokenType.L_PAREN, advance=False): 908 expressions = self._parse_wrapped_options() 909 else: 910 expressions = [self._parse_format_name()] 911 912 return self.expression( 913 exp.FileFormatProperty, 914 expressions=expressions, 915 ) 916 917 def _parse_credentials_property(self) -> exp.CredentialsProperty: 918 return self.expression( 919 exp.CredentialsProperty, 920 expressions=self._parse_wrapped_options(), 921 ) 922 923 class Tokenizer(tokens.Tokenizer): 924 STRING_ESCAPES = ["\\", "'"] 925 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 926 RAW_STRINGS = ["$$"] 927 COMMENTS = ["--", "//", ("/*", "*/")] 928 NESTED_COMMENTS = False 929 930 KEYWORDS = { 931 **tokens.Tokenizer.KEYWORDS, 932 "FILE://": TokenType.URI_START, 933 "BYTEINT": TokenType.INT, 934 "CHAR VARYING": TokenType.VARCHAR, 935 "CHARACTER VARYING": TokenType.VARCHAR, 936 "EXCLUDE": TokenType.EXCEPT, 937 "FILE FORMAT": TokenType.FILE_FORMAT, 938 "ILIKE ANY": TokenType.ILIKE_ANY, 939 "LIKE ANY": TokenType.LIKE_ANY, 940 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 941 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 942 "MINUS": TokenType.EXCEPT, 943 "NCHAR VARYING": TokenType.VARCHAR, 944 "PUT": TokenType.PUT, 945 "REMOVE": TokenType.COMMAND, 946 "RM": TokenType.COMMAND, 947 "SAMPLE": TokenType.TABLE_SAMPLE, 948 "SQL_DOUBLE": TokenType.DOUBLE, 949 "SQL_VARCHAR": TokenType.VARCHAR, 950 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 951 "TAG": TokenType.TAG, 952 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 953 "TOP": TokenType.TOP, 954 "WAREHOUSE": TokenType.WAREHOUSE, 955 "STAGE": TokenType.STAGE, 956 "STREAMLIT": TokenType.STREAMLIT, 957 } 958 KEYWORDS.pop("/*+") 959 960 SINGLE_TOKENS = { 961 **tokens.Tokenizer.SINGLE_TOKENS, 962 "$": TokenType.PARAMETER, 963 } 964 965 VAR_SINGLE_TOKENS = {"$"} 966 967 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 968 969 class Generator(generator.Generator): 970 PARAMETER_TOKEN = "$" 971 MATCHED_BY_SOURCE = False 972 SINGLE_STRING_INTERVAL = True 973 JOIN_HINTS = False 974 TABLE_HINTS = False 975 QUERY_HINTS = False 976 AGGREGATE_FILTER_SUPPORTED = False 977 SUPPORTS_TABLE_COPY = False 978 COLLATE_IS_FUNC = True 979 LIMIT_ONLY_LITERALS = True 980 JSON_KEY_VALUE_PAIR_SEP = "," 981 INSERT_OVERWRITE = " OVERWRITE INTO" 982 STRUCT_DELIMITER = ("(", ")") 983 COPY_PARAMS_ARE_WRAPPED = False 984 COPY_PARAMS_EQ_REQUIRED = True 985 STAR_EXCEPT = "EXCLUDE" 986 SUPPORTS_EXPLODING_PROJECTIONS = False 987 ARRAY_CONCAT_IS_VAR_LEN = False 988 SUPPORTS_CONVERT_TIMEZONE = True 989 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 990 SUPPORTS_MEDIAN = True 991 ARRAY_SIZE_NAME = "ARRAY_SIZE" 992 993 TRANSFORMS = { 994 **generator.Generator.TRANSFORMS, 995 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 996 exp.ArgMax: rename_func("MAX_BY"), 997 exp.ArgMin: rename_func("MIN_BY"), 998 exp.Array: inline_array_sql, 999 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1000 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1001 exp.AtTimeZone: lambda self, e: self.func( 1002 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1003 ), 1004 exp.BitwiseOr: rename_func("BITOR"), 1005 exp.BitwiseXor: rename_func("BITXOR"), 1006 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1007 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1008 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1009 exp.DateAdd: date_delta_sql("DATEADD"), 1010 exp.DateDiff: date_delta_sql("DATEDIFF"), 1011 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1012 exp.DatetimeDiff: timestampdiff_sql, 1013 exp.DateStrToDate: datestrtodate_sql, 1014 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1015 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1016 exp.DayOfYear: rename_func("DAYOFYEAR"), 1017 exp.Explode: rename_func("FLATTEN"), 1018 exp.Extract: rename_func("DATE_PART"), 1019 exp.FileFormatProperty: lambda self, 1020 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1021 exp.FromTimeZone: lambda self, e: self.func( 1022 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1023 ), 1024 exp.GenerateSeries: lambda self, e: self.func( 1025 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1026 ), 1027 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1028 exp.If: if_sql(name="IFF", false_value="NULL"), 1029 exp.JSONExtractArray: _json_extract_value_array_sql, 1030 exp.JSONExtractScalar: lambda self, e: self.func( 1031 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1032 ), 1033 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1034 exp.JSONPathRoot: lambda *_: "", 1035 exp.JSONValueArray: _json_extract_value_array_sql, 1036 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1037 rename_func("EDITDISTANCE") 1038 ), 1039 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1040 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1041 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1042 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1043 exp.MakeInterval: no_make_interval_sql, 1044 exp.Max: max_or_greatest, 1045 exp.Min: min_or_least, 1046 exp.ParseJSON: lambda self, e: self.func( 1047 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1048 ), 1049 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1050 exp.PercentileCont: transforms.preprocess( 1051 [transforms.add_within_group_for_percentiles] 1052 ), 1053 exp.PercentileDisc: transforms.preprocess( 1054 [transforms.add_within_group_for_percentiles] 1055 ), 1056 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1057 exp.RegexpExtract: _regexpextract_sql, 1058 exp.RegexpExtractAll: _regexpextract_sql, 1059 exp.RegexpILike: _regexpilike_sql, 1060 exp.Rand: rename_func("RANDOM"), 1061 exp.Select: transforms.preprocess( 1062 [ 1063 transforms.eliminate_distinct_on, 1064 transforms.explode_projection_to_unnest(), 1065 transforms.eliminate_semi_and_anti_joins, 1066 _transform_generate_date_array, 1067 ] 1068 ), 1069 exp.SHA: rename_func("SHA1"), 1070 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1071 exp.StartsWith: rename_func("STARTSWITH"), 1072 exp.StrPosition: lambda self, e: strposition_sql( 1073 self, e, func_name="CHARINDEX", supports_position=True 1074 ), 1075 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1076 exp.Stuff: rename_func("INSERT"), 1077 exp.TimeAdd: date_delta_sql("TIMEADD"), 1078 exp.Timestamp: no_timestamp_sql, 1079 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1080 exp.TimestampDiff: lambda self, e: self.func( 1081 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1082 ), 1083 exp.TimestampTrunc: timestamptrunc_sql(), 1084 exp.TimeStrToTime: timestrtotime_sql, 1085 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1086 exp.ToArray: rename_func("TO_ARRAY"), 1087 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1088 exp.ToDouble: rename_func("TO_DOUBLE"), 1089 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1090 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1091 exp.TsOrDsToDate: lambda self, e: self.func( 1092 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1093 ), 1094 exp.TsOrDsToTime: lambda self, e: self.func( 1095 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1096 ), 1097 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1098 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1099 exp.Uuid: rename_func("UUID_STRING"), 1100 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1101 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1102 exp.Xor: rename_func("BOOLXOR"), 1103 } 1104 1105 SUPPORTED_JSON_PATH_PARTS = { 1106 exp.JSONPathKey, 1107 exp.JSONPathRoot, 1108 exp.JSONPathSubscript, 1109 } 1110 1111 TYPE_MAPPING = { 1112 **generator.Generator.TYPE_MAPPING, 1113 exp.DataType.Type.NESTED: "OBJECT", 1114 exp.DataType.Type.STRUCT: "OBJECT", 1115 } 1116 1117 TOKEN_MAPPING = { 1118 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1119 } 1120 1121 PROPERTIES_LOCATION = { 1122 **generator.Generator.PROPERTIES_LOCATION, 1123 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1124 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1125 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1126 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1127 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1128 } 1129 1130 UNSUPPORTED_VALUES_EXPRESSIONS = { 1131 exp.Map, 1132 exp.StarMap, 1133 exp.Struct, 1134 exp.VarMap, 1135 } 1136 1137 def with_properties(self, properties: exp.Properties) -> str: 1138 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1139 1140 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1141 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1142 values_as_table = False 1143 1144 return super().values_sql(expression, values_as_table=values_as_table) 1145 1146 def datatype_sql(self, expression: exp.DataType) -> str: 1147 expressions = expression.expressions 1148 if ( 1149 expressions 1150 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1151 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1152 ): 1153 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1154 return "OBJECT" 1155 1156 return super().datatype_sql(expression) 1157 1158 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1159 return self.func( 1160 "TO_NUMBER", 1161 expression.this, 1162 expression.args.get("format"), 1163 expression.args.get("precision"), 1164 expression.args.get("scale"), 1165 ) 1166 1167 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1168 milli = expression.args.get("milli") 1169 if milli is not None: 1170 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1171 expression.set("nano", milli_to_nano) 1172 1173 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1174 1175 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1176 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1177 return self.func("TO_GEOGRAPHY", expression.this) 1178 if expression.is_type(exp.DataType.Type.GEOMETRY): 1179 return self.func("TO_GEOMETRY", expression.this) 1180 1181 return super().cast_sql(expression, safe_prefix=safe_prefix) 1182 1183 def trycast_sql(self, expression: exp.TryCast) -> str: 1184 value = expression.this 1185 1186 if value.type is None: 1187 from sqlglot.optimizer.annotate_types import annotate_types 1188 1189 value = annotate_types(value, dialect=self.dialect) 1190 1191 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1192 return super().trycast_sql(expression) 1193 1194 # TRY_CAST only works for string values in Snowflake 1195 return self.cast_sql(expression) 1196 1197 def log_sql(self, expression: exp.Log) -> str: 1198 if not expression.expression: 1199 return self.func("LN", expression.this) 1200 1201 return super().log_sql(expression) 1202 1203 def unnest_sql(self, expression: exp.Unnest) -> str: 1204 unnest_alias = expression.args.get("alias") 1205 offset = expression.args.get("offset") 1206 1207 columns = [ 1208 exp.to_identifier("seq"), 1209 exp.to_identifier("key"), 1210 exp.to_identifier("path"), 1211 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1212 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1213 or exp.to_identifier("value"), 1214 exp.to_identifier("this"), 1215 ] 1216 1217 if unnest_alias: 1218 unnest_alias.set("columns", columns) 1219 else: 1220 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1221 1222 table_input = self.sql(expression.expressions[0]) 1223 if not table_input.startswith("INPUT =>"): 1224 table_input = f"INPUT => {table_input}" 1225 1226 explode = f"TABLE(FLATTEN({table_input}))" 1227 alias = self.sql(unnest_alias) 1228 alias = f" AS {alias}" if alias else "" 1229 return f"{explode}{alias}" 1230 1231 def show_sql(self, expression: exp.Show) -> str: 1232 terse = "TERSE " if expression.args.get("terse") else "" 1233 history = " HISTORY" if expression.args.get("history") else "" 1234 like = self.sql(expression, "like") 1235 like = f" LIKE {like}" if like else "" 1236 1237 scope = self.sql(expression, "scope") 1238 scope = f" {scope}" if scope else "" 1239 1240 scope_kind = self.sql(expression, "scope_kind") 1241 if scope_kind: 1242 scope_kind = f" IN {scope_kind}" 1243 1244 starts_with = self.sql(expression, "starts_with") 1245 if starts_with: 1246 starts_with = f" STARTS WITH {starts_with}" 1247 1248 limit = self.sql(expression, "limit") 1249 1250 from_ = self.sql(expression, "from") 1251 if from_: 1252 from_ = f" FROM {from_}" 1253 1254 privileges = self.expressions(expression, key="privileges", flat=True) 1255 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1256 1257 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1258 1259 def describe_sql(self, expression: exp.Describe) -> str: 1260 # Default to table if kind is unknown 1261 kind_value = expression.args.get("kind") or "TABLE" 1262 kind = f" {kind_value}" if kind_value else "" 1263 this = f" {self.sql(expression, 'this')}" 1264 expressions = self.expressions(expression, flat=True) 1265 expressions = f" {expressions}" if expressions else "" 1266 return f"DESCRIBE{kind}{this}{expressions}" 1267 1268 def generatedasidentitycolumnconstraint_sql( 1269 self, expression: exp.GeneratedAsIdentityColumnConstraint 1270 ) -> str: 1271 start = expression.args.get("start") 1272 start = f" START {start}" if start else "" 1273 increment = expression.args.get("increment") 1274 increment = f" INCREMENT {increment}" if increment else "" 1275 return f"AUTOINCREMENT{start}{increment}" 1276 1277 def cluster_sql(self, expression: exp.Cluster) -> str: 1278 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1279 1280 def struct_sql(self, expression: exp.Struct) -> str: 1281 keys = [] 1282 values = [] 1283 1284 for i, e in enumerate(expression.expressions): 1285 if isinstance(e, exp.PropertyEQ): 1286 keys.append( 1287 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1288 ) 1289 values.append(e.expression) 1290 else: 1291 keys.append(exp.Literal.string(f"_{i}")) 1292 values.append(e) 1293 1294 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1295 1296 @unsupported_args("weight", "accuracy") 1297 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1298 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1299 1300 def alterset_sql(self, expression: exp.AlterSet) -> str: 1301 exprs = self.expressions(expression, flat=True) 1302 exprs = f" {exprs}" if exprs else "" 1303 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1304 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1305 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1306 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1307 tag = self.expressions(expression, key="tag", flat=True) 1308 tag = f" TAG {tag}" if tag else "" 1309 1310 return f"SET{exprs}{file_format}{copy_options}{tag}" 1311 1312 def strtotime_sql(self, expression: exp.StrToTime): 1313 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1314 return self.func( 1315 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1316 ) 1317 1318 def timestampsub_sql(self, expression: exp.TimestampSub): 1319 return self.sql( 1320 exp.TimestampAdd( 1321 this=expression.this, 1322 expression=expression.expression * -1, 1323 unit=expression.unit, 1324 ) 1325 ) 1326 1327 def jsonextract_sql(self, expression: exp.JSONExtract): 1328 this = expression.this 1329 1330 # JSON strings are valid coming from other dialects such as BQ 1331 return self.func( 1332 "GET_PATH", 1333 exp.ParseJSON(this=this) if this.is_string else this, 1334 expression.expression, 1335 ) 1336 1337 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1338 this = expression.this 1339 if not isinstance(this, exp.TsOrDsToTimestamp): 1340 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1341 1342 return self.func("TO_CHAR", this, self.format_time(expression)) 1343 1344 def datesub_sql(self, expression: exp.DateSub) -> str: 1345 value = expression.expression 1346 if value: 1347 value.replace(value * (-1)) 1348 else: 1349 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1350 1351 return date_delta_sql("DATEADD")(self, expression) 1352 1353 def select_sql(self, expression: exp.Select) -> str: 1354 limit = expression.args.get("limit") 1355 offset = expression.args.get("offset") 1356 if offset and not limit: 1357 expression.limit(exp.Null(), copy=False) 1358 return super().select_sql(expression)
337class Snowflake(Dialect): 338 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 339 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 340 NULL_ORDERING = "nulls_are_large" 341 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 342 SUPPORTS_USER_DEFINED_TYPES = False 343 SUPPORTS_SEMI_ANTI_JOIN = False 344 PREFER_CTE_ALIAS_COLUMN = True 345 TABLESAMPLE_SIZE_IS_PERCENT = True 346 COPY_PARAMS_ARE_CSV = False 347 ARRAY_AGG_INCLUDES_NULLS = None 348 349 TIME_MAPPING = { 350 "YYYY": "%Y", 351 "yyyy": "%Y", 352 "YY": "%y", 353 "yy": "%y", 354 "MMMM": "%B", 355 "mmmm": "%B", 356 "MON": "%b", 357 "mon": "%b", 358 "MM": "%m", 359 "mm": "%m", 360 "DD": "%d", 361 "dd": "%-d", 362 "DY": "%a", 363 "dy": "%w", 364 "HH24": "%H", 365 "hh24": "%H", 366 "HH12": "%I", 367 "hh12": "%I", 368 "MI": "%M", 369 "mi": "%M", 370 "SS": "%S", 371 "ss": "%S", 372 "FF6": "%f", 373 "ff6": "%f", 374 } 375 376 def quote_identifier(self, expression: E, identify: bool = True) -> E: 377 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 378 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 379 if ( 380 isinstance(expression, exp.Identifier) 381 and isinstance(expression.parent, exp.Table) 382 and expression.name.lower() == "dual" 383 ): 384 return expression # type: ignore 385 386 return super().quote_identifier(expression, identify=identify) 387 388 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 389 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 390 SINGLE_TOKENS.pop("$") 391 392 class Parser(parser.Parser): 393 IDENTIFY_PIVOT_STRINGS = True 394 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 395 COLON_IS_VARIANT_EXTRACT = True 396 397 ID_VAR_TOKENS = { 398 *parser.Parser.ID_VAR_TOKENS, 399 TokenType.MATCH_CONDITION, 400 } 401 402 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 403 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 404 405 FUNCTIONS = { 406 **parser.Parser.FUNCTIONS, 407 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 408 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 409 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 410 this=seq_get(args, 1), expression=seq_get(args, 0) 411 ), 412 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 413 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 414 start=seq_get(args, 0), 415 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 416 step=seq_get(args, 2), 417 ), 418 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 419 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 420 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 421 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 422 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 423 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 424 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 425 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 426 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 427 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 428 "DATE_TRUNC": _date_trunc_to_time, 429 "DATEADD": _build_date_time_add(exp.DateAdd), 430 "DATEDIFF": _build_datediff, 431 "DIV0": _build_if_from_div0, 432 "EDITDISTANCE": lambda args: exp.Levenshtein( 433 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 434 ), 435 "FLATTEN": exp.Explode.from_arg_list, 436 "GET_PATH": lambda args, dialect: exp.JSONExtract( 437 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 438 ), 439 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 440 "IFF": exp.If.from_arg_list, 441 "LAST_DAY": lambda args: exp.LastDay( 442 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 443 ), 444 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 445 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 446 "NULLIFZERO": _build_if_from_nullifzero, 447 "OBJECT_CONSTRUCT": _build_object_construct, 448 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 449 "REGEXP_REPLACE": _build_regexp_replace, 450 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 451 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 452 "RLIKE": exp.RegexpLike.from_arg_list, 453 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 454 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 455 "TIMEADD": _build_date_time_add(exp.TimeAdd), 456 "TIMEDIFF": _build_datediff, 457 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 458 "TIMESTAMPDIFF": _build_datediff, 459 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 460 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 461 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 462 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 463 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 464 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 465 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 466 "TRY_TO_TIMESTAMP": _build_datetime( 467 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 468 ), 469 "TO_CHAR": build_timetostr_or_tochar, 470 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 471 "TO_NUMBER": lambda args: exp.ToNumber( 472 this=seq_get(args, 0), 473 format=seq_get(args, 1), 474 precision=seq_get(args, 2), 475 scale=seq_get(args, 3), 476 ), 477 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 478 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 479 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 480 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 481 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 482 "TO_VARCHAR": exp.ToChar.from_arg_list, 483 "ZEROIFNULL": _build_if_from_zeroifnull, 484 } 485 486 FUNCTION_PARSERS = { 487 **parser.Parser.FUNCTION_PARSERS, 488 "DATE_PART": lambda self: self._parse_date_part(), 489 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 490 "LISTAGG": lambda self: self._parse_string_agg(), 491 } 492 FUNCTION_PARSERS.pop("TRIM") 493 494 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 495 496 RANGE_PARSERS = { 497 **parser.Parser.RANGE_PARSERS, 498 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 499 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 500 } 501 502 ALTER_PARSERS = { 503 **parser.Parser.ALTER_PARSERS, 504 "UNSET": lambda self: self.expression( 505 exp.Set, 506 tag=self._match_text_seq("TAG"), 507 expressions=self._parse_csv(self._parse_id_var), 508 unset=True, 509 ), 510 } 511 512 STATEMENT_PARSERS = { 513 **parser.Parser.STATEMENT_PARSERS, 514 TokenType.PUT: lambda self: self._parse_put(), 515 TokenType.SHOW: lambda self: self._parse_show(), 516 } 517 518 PROPERTY_PARSERS = { 519 **parser.Parser.PROPERTY_PARSERS, 520 "CREDENTIALS": lambda self: self._parse_credentials_property(), 521 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 522 "LOCATION": lambda self: self._parse_location_property(), 523 "TAG": lambda self: self._parse_tag(), 524 "USING": lambda self: self._match_text_seq("TEMPLATE") 525 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 526 } 527 528 TYPE_CONVERTERS = { 529 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 530 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 531 } 532 533 SHOW_PARSERS = { 534 "DATABASES": _show_parser("DATABASES"), 535 "TERSE DATABASES": _show_parser("DATABASES"), 536 "SCHEMAS": _show_parser("SCHEMAS"), 537 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 538 "OBJECTS": _show_parser("OBJECTS"), 539 "TERSE OBJECTS": _show_parser("OBJECTS"), 540 "TABLES": _show_parser("TABLES"), 541 "TERSE TABLES": _show_parser("TABLES"), 542 "VIEWS": _show_parser("VIEWS"), 543 "TERSE VIEWS": _show_parser("VIEWS"), 544 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 545 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 546 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 547 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 548 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 549 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 550 "SEQUENCES": _show_parser("SEQUENCES"), 551 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 552 "STAGES": _show_parser("STAGES"), 553 "COLUMNS": _show_parser("COLUMNS"), 554 "USERS": _show_parser("USERS"), 555 "TERSE USERS": _show_parser("USERS"), 556 "FILE FORMATS": _show_parser("FILE FORMATS"), 557 "FUNCTIONS": _show_parser("FUNCTIONS"), 558 "PROCEDURES": _show_parser("PROCEDURES"), 559 "WAREHOUSES": _show_parser("WAREHOUSES"), 560 } 561 562 CONSTRAINT_PARSERS = { 563 **parser.Parser.CONSTRAINT_PARSERS, 564 "WITH": lambda self: self._parse_with_constraint(), 565 "MASKING": lambda self: self._parse_with_constraint(), 566 "PROJECTION": lambda self: self._parse_with_constraint(), 567 "TAG": lambda self: self._parse_with_constraint(), 568 } 569 570 STAGED_FILE_SINGLE_TOKENS = { 571 TokenType.DOT, 572 TokenType.MOD, 573 TokenType.SLASH, 574 } 575 576 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 577 578 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 579 580 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 581 582 LAMBDAS = { 583 **parser.Parser.LAMBDAS, 584 TokenType.ARROW: lambda self, expressions: self.expression( 585 exp.Lambda, 586 this=self._replace_lambda( 587 self._parse_assignment(), 588 expressions, 589 ), 590 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 591 ), 592 } 593 594 def _parse_use(self) -> exp.Use: 595 if self._match_text_seq("SECONDARY", "ROLES"): 596 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 597 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 598 return self.expression( 599 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 600 ) 601 602 return super()._parse_use() 603 604 def _negate_range( 605 self, this: t.Optional[exp.Expression] = None 606 ) -> t.Optional[exp.Expression]: 607 if not this: 608 return this 609 610 query = this.args.get("query") 611 if isinstance(this, exp.In) and isinstance(query, exp.Query): 612 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 613 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 614 # which can produce different results (most likely a SnowFlake bug). 615 # 616 # https://docs.snowflake.com/en/sql-reference/functions/in 617 # Context: https://github.com/tobymao/sqlglot/issues/3890 618 return self.expression( 619 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 620 ) 621 622 return self.expression(exp.Not, this=this) 623 624 def _parse_tag(self) -> exp.Tags: 625 return self.expression( 626 exp.Tags, 627 expressions=self._parse_wrapped_csv(self._parse_property), 628 ) 629 630 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 631 if self._prev.token_type != TokenType.WITH: 632 self._retreat(self._index - 1) 633 634 if self._match_text_seq("MASKING", "POLICY"): 635 policy = self._parse_column() 636 return self.expression( 637 exp.MaskingPolicyColumnConstraint, 638 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 639 expressions=self._match(TokenType.USING) 640 and self._parse_wrapped_csv(self._parse_id_var), 641 ) 642 if self._match_text_seq("PROJECTION", "POLICY"): 643 policy = self._parse_column() 644 return self.expression( 645 exp.ProjectionPolicyColumnConstraint, 646 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 647 ) 648 if self._match(TokenType.TAG): 649 return self._parse_tag() 650 651 return None 652 653 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 654 if self._match(TokenType.TAG): 655 return self._parse_tag() 656 657 return super()._parse_with_property() 658 659 def _parse_create(self) -> exp.Create | exp.Command: 660 expression = super()._parse_create() 661 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 662 # Replace the Table node with the enclosed Identifier 663 expression.this.replace(expression.this.this) 664 665 return expression 666 667 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 668 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 669 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 670 this = self._parse_var() or self._parse_type() 671 672 if not this: 673 return None 674 675 self._match(TokenType.COMMA) 676 expression = self._parse_bitwise() 677 this = map_date_part(this) 678 name = this.name.upper() 679 680 if name.startswith("EPOCH"): 681 if name == "EPOCH_MILLISECOND": 682 scale = 10**3 683 elif name == "EPOCH_MICROSECOND": 684 scale = 10**6 685 elif name == "EPOCH_NANOSECOND": 686 scale = 10**9 687 else: 688 scale = None 689 690 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 691 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 692 693 if scale: 694 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 695 696 return to_unix 697 698 return self.expression(exp.Extract, this=this, expression=expression) 699 700 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 701 if is_map: 702 # Keys are strings in Snowflake's objects, see also: 703 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 704 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 705 return self._parse_slice(self._parse_string()) 706 707 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 708 709 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 710 lateral = super()._parse_lateral() 711 if not lateral: 712 return lateral 713 714 if isinstance(lateral.this, exp.Explode): 715 table_alias = lateral.args.get("alias") 716 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 717 if table_alias and not table_alias.args.get("columns"): 718 table_alias.set("columns", columns) 719 elif not table_alias: 720 exp.alias_(lateral, "_flattened", table=columns, copy=False) 721 722 return lateral 723 724 def _parse_table_parts( 725 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 726 ) -> exp.Table: 727 # https://docs.snowflake.com/en/user-guide/querying-stage 728 if self._match(TokenType.STRING, advance=False): 729 table = self._parse_string() 730 elif self._match_text_seq("@", advance=False): 731 table = self._parse_location_path() 732 else: 733 table = None 734 735 if table: 736 file_format = None 737 pattern = None 738 739 wrapped = self._match(TokenType.L_PAREN) 740 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 741 if self._match_text_seq("FILE_FORMAT", "=>"): 742 file_format = self._parse_string() or super()._parse_table_parts( 743 is_db_reference=is_db_reference 744 ) 745 elif self._match_text_seq("PATTERN", "=>"): 746 pattern = self._parse_string() 747 else: 748 break 749 750 self._match(TokenType.COMMA) 751 752 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 753 else: 754 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 755 756 return table 757 758 def _parse_table( 759 self, 760 schema: bool = False, 761 joins: bool = False, 762 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 763 parse_bracket: bool = False, 764 is_db_reference: bool = False, 765 parse_partition: bool = False, 766 ) -> t.Optional[exp.Expression]: 767 table = super()._parse_table( 768 schema=schema, 769 joins=joins, 770 alias_tokens=alias_tokens, 771 parse_bracket=parse_bracket, 772 is_db_reference=is_db_reference, 773 parse_partition=parse_partition, 774 ) 775 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 776 table_from_rows = table.this 777 for arg in exp.TableFromRows.arg_types: 778 if arg != "this": 779 table_from_rows.set(arg, table.args.get(arg)) 780 781 table = table_from_rows 782 783 return table 784 785 def _parse_id_var( 786 self, 787 any_token: bool = True, 788 tokens: t.Optional[t.Collection[TokenType]] = None, 789 ) -> t.Optional[exp.Expression]: 790 if self._match_text_seq("IDENTIFIER", "("): 791 identifier = ( 792 super()._parse_id_var(any_token=any_token, tokens=tokens) 793 or self._parse_string() 794 ) 795 self._match_r_paren() 796 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 797 798 return super()._parse_id_var(any_token=any_token, tokens=tokens) 799 800 def _parse_show_snowflake(self, this: str) -> exp.Show: 801 scope = None 802 scope_kind = None 803 804 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 805 # which is syntactically valid but has no effect on the output 806 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 807 808 history = self._match_text_seq("HISTORY") 809 810 like = self._parse_string() if self._match(TokenType.LIKE) else None 811 812 if self._match(TokenType.IN): 813 if self._match_text_seq("ACCOUNT"): 814 scope_kind = "ACCOUNT" 815 elif self._match_text_seq("CLASS"): 816 scope_kind = "CLASS" 817 scope = self._parse_table_parts() 818 elif self._match_text_seq("APPLICATION"): 819 scope_kind = "APPLICATION" 820 if self._match_text_seq("PACKAGE"): 821 scope_kind += " PACKAGE" 822 scope = self._parse_table_parts() 823 elif self._match_set(self.DB_CREATABLES): 824 scope_kind = self._prev.text.upper() 825 if self._curr: 826 scope = self._parse_table_parts() 827 elif self._curr: 828 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 829 scope = self._parse_table_parts() 830 831 return self.expression( 832 exp.Show, 833 **{ 834 "terse": terse, 835 "this": this, 836 "history": history, 837 "like": like, 838 "scope": scope, 839 "scope_kind": scope_kind, 840 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 841 "limit": self._parse_limit(), 842 "from": self._parse_string() if self._match(TokenType.FROM) else None, 843 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 844 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 845 }, 846 ) 847 848 def _parse_put(self) -> exp.Put | exp.Command: 849 if self._curr.token_type != TokenType.STRING: 850 return self._parse_as_command(self._prev) 851 852 return self.expression( 853 exp.Put, 854 this=self._parse_string(), 855 target=self._parse_location_path(), 856 properties=self._parse_properties(), 857 ) 858 859 def _parse_location_property(self) -> exp.LocationProperty: 860 self._match(TokenType.EQ) 861 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 862 863 def _parse_file_location(self) -> t.Optional[exp.Expression]: 864 # Parse either a subquery or a staged file 865 return ( 866 self._parse_select(table=True, parse_subquery_alias=False) 867 if self._match(TokenType.L_PAREN, advance=False) 868 else self._parse_table_parts() 869 ) 870 871 def _parse_location_path(self) -> exp.Var: 872 start = self._curr 873 self._advance_any(ignore_reserved=True) 874 875 # We avoid consuming a comma token because external tables like @foo and @bar 876 # can be joined in a query with a comma separator, as well as closing paren 877 # in case of subqueries 878 while self._is_connected() and not self._match_set( 879 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 880 ): 881 self._advance_any(ignore_reserved=True) 882 883 return exp.var(self._find_sql(start, self._prev)) 884 885 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 886 this = super()._parse_lambda_arg() 887 888 if not this: 889 return this 890 891 typ = self._parse_types() 892 893 if typ: 894 return self.expression(exp.Cast, this=this, to=typ) 895 896 return this 897 898 def _parse_foreign_key(self) -> exp.ForeignKey: 899 # inlineFK, the REFERENCES columns are implied 900 if self._match(TokenType.REFERENCES, advance=False): 901 return self.expression(exp.ForeignKey) 902 903 # outoflineFK, explicitly names the columns 904 return super()._parse_foreign_key() 905 906 def _parse_file_format_property(self) -> exp.FileFormatProperty: 907 self._match(TokenType.EQ) 908 if self._match(TokenType.L_PAREN, advance=False): 909 expressions = self._parse_wrapped_options() 910 else: 911 expressions = [self._parse_format_name()] 912 913 return self.expression( 914 exp.FileFormatProperty, 915 expressions=expressions, 916 ) 917 918 def _parse_credentials_property(self) -> exp.CredentialsProperty: 919 return self.expression( 920 exp.CredentialsProperty, 921 expressions=self._parse_wrapped_options(), 922 ) 923 924 class Tokenizer(tokens.Tokenizer): 925 STRING_ESCAPES = ["\\", "'"] 926 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 927 RAW_STRINGS = ["$$"] 928 COMMENTS = ["--", "//", ("/*", "*/")] 929 NESTED_COMMENTS = False 930 931 KEYWORDS = { 932 **tokens.Tokenizer.KEYWORDS, 933 "FILE://": TokenType.URI_START, 934 "BYTEINT": TokenType.INT, 935 "CHAR VARYING": TokenType.VARCHAR, 936 "CHARACTER VARYING": TokenType.VARCHAR, 937 "EXCLUDE": TokenType.EXCEPT, 938 "FILE FORMAT": TokenType.FILE_FORMAT, 939 "ILIKE ANY": TokenType.ILIKE_ANY, 940 "LIKE ANY": TokenType.LIKE_ANY, 941 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 942 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 943 "MINUS": TokenType.EXCEPT, 944 "NCHAR VARYING": TokenType.VARCHAR, 945 "PUT": TokenType.PUT, 946 "REMOVE": TokenType.COMMAND, 947 "RM": TokenType.COMMAND, 948 "SAMPLE": TokenType.TABLE_SAMPLE, 949 "SQL_DOUBLE": TokenType.DOUBLE, 950 "SQL_VARCHAR": TokenType.VARCHAR, 951 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 952 "TAG": TokenType.TAG, 953 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 954 "TOP": TokenType.TOP, 955 "WAREHOUSE": TokenType.WAREHOUSE, 956 "STAGE": TokenType.STAGE, 957 "STREAMLIT": TokenType.STREAMLIT, 958 } 959 KEYWORDS.pop("/*+") 960 961 SINGLE_TOKENS = { 962 **tokens.Tokenizer.SINGLE_TOKENS, 963 "$": TokenType.PARAMETER, 964 } 965 966 VAR_SINGLE_TOKENS = {"$"} 967 968 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 969 970 class Generator(generator.Generator): 971 PARAMETER_TOKEN = "$" 972 MATCHED_BY_SOURCE = False 973 SINGLE_STRING_INTERVAL = True 974 JOIN_HINTS = False 975 TABLE_HINTS = False 976 QUERY_HINTS = False 977 AGGREGATE_FILTER_SUPPORTED = False 978 SUPPORTS_TABLE_COPY = False 979 COLLATE_IS_FUNC = True 980 LIMIT_ONLY_LITERALS = True 981 JSON_KEY_VALUE_PAIR_SEP = "," 982 INSERT_OVERWRITE = " OVERWRITE INTO" 983 STRUCT_DELIMITER = ("(", ")") 984 COPY_PARAMS_ARE_WRAPPED = False 985 COPY_PARAMS_EQ_REQUIRED = True 986 STAR_EXCEPT = "EXCLUDE" 987 SUPPORTS_EXPLODING_PROJECTIONS = False 988 ARRAY_CONCAT_IS_VAR_LEN = False 989 SUPPORTS_CONVERT_TIMEZONE = True 990 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 991 SUPPORTS_MEDIAN = True 992 ARRAY_SIZE_NAME = "ARRAY_SIZE" 993 994 TRANSFORMS = { 995 **generator.Generator.TRANSFORMS, 996 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 997 exp.ArgMax: rename_func("MAX_BY"), 998 exp.ArgMin: rename_func("MIN_BY"), 999 exp.Array: inline_array_sql, 1000 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1001 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1002 exp.AtTimeZone: lambda self, e: self.func( 1003 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1004 ), 1005 exp.BitwiseOr: rename_func("BITOR"), 1006 exp.BitwiseXor: rename_func("BITXOR"), 1007 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1008 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1009 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1010 exp.DateAdd: date_delta_sql("DATEADD"), 1011 exp.DateDiff: date_delta_sql("DATEDIFF"), 1012 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1013 exp.DatetimeDiff: timestampdiff_sql, 1014 exp.DateStrToDate: datestrtodate_sql, 1015 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1016 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1017 exp.DayOfYear: rename_func("DAYOFYEAR"), 1018 exp.Explode: rename_func("FLATTEN"), 1019 exp.Extract: rename_func("DATE_PART"), 1020 exp.FileFormatProperty: lambda self, 1021 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1022 exp.FromTimeZone: lambda self, e: self.func( 1023 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1024 ), 1025 exp.GenerateSeries: lambda self, e: self.func( 1026 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1027 ), 1028 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1029 exp.If: if_sql(name="IFF", false_value="NULL"), 1030 exp.JSONExtractArray: _json_extract_value_array_sql, 1031 exp.JSONExtractScalar: lambda self, e: self.func( 1032 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1033 ), 1034 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1035 exp.JSONPathRoot: lambda *_: "", 1036 exp.JSONValueArray: _json_extract_value_array_sql, 1037 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1038 rename_func("EDITDISTANCE") 1039 ), 1040 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1041 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1042 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1043 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1044 exp.MakeInterval: no_make_interval_sql, 1045 exp.Max: max_or_greatest, 1046 exp.Min: min_or_least, 1047 exp.ParseJSON: lambda self, e: self.func( 1048 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1049 ), 1050 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1051 exp.PercentileCont: transforms.preprocess( 1052 [transforms.add_within_group_for_percentiles] 1053 ), 1054 exp.PercentileDisc: transforms.preprocess( 1055 [transforms.add_within_group_for_percentiles] 1056 ), 1057 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1058 exp.RegexpExtract: _regexpextract_sql, 1059 exp.RegexpExtractAll: _regexpextract_sql, 1060 exp.RegexpILike: _regexpilike_sql, 1061 exp.Rand: rename_func("RANDOM"), 1062 exp.Select: transforms.preprocess( 1063 [ 1064 transforms.eliminate_distinct_on, 1065 transforms.explode_projection_to_unnest(), 1066 transforms.eliminate_semi_and_anti_joins, 1067 _transform_generate_date_array, 1068 ] 1069 ), 1070 exp.SHA: rename_func("SHA1"), 1071 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1072 exp.StartsWith: rename_func("STARTSWITH"), 1073 exp.StrPosition: lambda self, e: strposition_sql( 1074 self, e, func_name="CHARINDEX", supports_position=True 1075 ), 1076 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1077 exp.Stuff: rename_func("INSERT"), 1078 exp.TimeAdd: date_delta_sql("TIMEADD"), 1079 exp.Timestamp: no_timestamp_sql, 1080 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1081 exp.TimestampDiff: lambda self, e: self.func( 1082 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1083 ), 1084 exp.TimestampTrunc: timestamptrunc_sql(), 1085 exp.TimeStrToTime: timestrtotime_sql, 1086 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1087 exp.ToArray: rename_func("TO_ARRAY"), 1088 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1089 exp.ToDouble: rename_func("TO_DOUBLE"), 1090 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1091 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1092 exp.TsOrDsToDate: lambda self, e: self.func( 1093 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1094 ), 1095 exp.TsOrDsToTime: lambda self, e: self.func( 1096 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1097 ), 1098 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1099 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1100 exp.Uuid: rename_func("UUID_STRING"), 1101 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1102 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1103 exp.Xor: rename_func("BOOLXOR"), 1104 } 1105 1106 SUPPORTED_JSON_PATH_PARTS = { 1107 exp.JSONPathKey, 1108 exp.JSONPathRoot, 1109 exp.JSONPathSubscript, 1110 } 1111 1112 TYPE_MAPPING = { 1113 **generator.Generator.TYPE_MAPPING, 1114 exp.DataType.Type.NESTED: "OBJECT", 1115 exp.DataType.Type.STRUCT: "OBJECT", 1116 } 1117 1118 TOKEN_MAPPING = { 1119 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1120 } 1121 1122 PROPERTIES_LOCATION = { 1123 **generator.Generator.PROPERTIES_LOCATION, 1124 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1125 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1126 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1127 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1128 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1129 } 1130 1131 UNSUPPORTED_VALUES_EXPRESSIONS = { 1132 exp.Map, 1133 exp.StarMap, 1134 exp.Struct, 1135 exp.VarMap, 1136 } 1137 1138 def with_properties(self, properties: exp.Properties) -> str: 1139 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1140 1141 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1142 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1143 values_as_table = False 1144 1145 return super().values_sql(expression, values_as_table=values_as_table) 1146 1147 def datatype_sql(self, expression: exp.DataType) -> str: 1148 expressions = expression.expressions 1149 if ( 1150 expressions 1151 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1152 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1153 ): 1154 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1155 return "OBJECT" 1156 1157 return super().datatype_sql(expression) 1158 1159 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1160 return self.func( 1161 "TO_NUMBER", 1162 expression.this, 1163 expression.args.get("format"), 1164 expression.args.get("precision"), 1165 expression.args.get("scale"), 1166 ) 1167 1168 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1169 milli = expression.args.get("milli") 1170 if milli is not None: 1171 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1172 expression.set("nano", milli_to_nano) 1173 1174 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1175 1176 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1177 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1178 return self.func("TO_GEOGRAPHY", expression.this) 1179 if expression.is_type(exp.DataType.Type.GEOMETRY): 1180 return self.func("TO_GEOMETRY", expression.this) 1181 1182 return super().cast_sql(expression, safe_prefix=safe_prefix) 1183 1184 def trycast_sql(self, expression: exp.TryCast) -> str: 1185 value = expression.this 1186 1187 if value.type is None: 1188 from sqlglot.optimizer.annotate_types import annotate_types 1189 1190 value = annotate_types(value, dialect=self.dialect) 1191 1192 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1193 return super().trycast_sql(expression) 1194 1195 # TRY_CAST only works for string values in Snowflake 1196 return self.cast_sql(expression) 1197 1198 def log_sql(self, expression: exp.Log) -> str: 1199 if not expression.expression: 1200 return self.func("LN", expression.this) 1201 1202 return super().log_sql(expression) 1203 1204 def unnest_sql(self, expression: exp.Unnest) -> str: 1205 unnest_alias = expression.args.get("alias") 1206 offset = expression.args.get("offset") 1207 1208 columns = [ 1209 exp.to_identifier("seq"), 1210 exp.to_identifier("key"), 1211 exp.to_identifier("path"), 1212 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1213 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1214 or exp.to_identifier("value"), 1215 exp.to_identifier("this"), 1216 ] 1217 1218 if unnest_alias: 1219 unnest_alias.set("columns", columns) 1220 else: 1221 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1222 1223 table_input = self.sql(expression.expressions[0]) 1224 if not table_input.startswith("INPUT =>"): 1225 table_input = f"INPUT => {table_input}" 1226 1227 explode = f"TABLE(FLATTEN({table_input}))" 1228 alias = self.sql(unnest_alias) 1229 alias = f" AS {alias}" if alias else "" 1230 return f"{explode}{alias}" 1231 1232 def show_sql(self, expression: exp.Show) -> str: 1233 terse = "TERSE " if expression.args.get("terse") else "" 1234 history = " HISTORY" if expression.args.get("history") else "" 1235 like = self.sql(expression, "like") 1236 like = f" LIKE {like}" if like else "" 1237 1238 scope = self.sql(expression, "scope") 1239 scope = f" {scope}" if scope else "" 1240 1241 scope_kind = self.sql(expression, "scope_kind") 1242 if scope_kind: 1243 scope_kind = f" IN {scope_kind}" 1244 1245 starts_with = self.sql(expression, "starts_with") 1246 if starts_with: 1247 starts_with = f" STARTS WITH {starts_with}" 1248 1249 limit = self.sql(expression, "limit") 1250 1251 from_ = self.sql(expression, "from") 1252 if from_: 1253 from_ = f" FROM {from_}" 1254 1255 privileges = self.expressions(expression, key="privileges", flat=True) 1256 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1257 1258 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1259 1260 def describe_sql(self, expression: exp.Describe) -> str: 1261 # Default to table if kind is unknown 1262 kind_value = expression.args.get("kind") or "TABLE" 1263 kind = f" {kind_value}" if kind_value else "" 1264 this = f" {self.sql(expression, 'this')}" 1265 expressions = self.expressions(expression, flat=True) 1266 expressions = f" {expressions}" if expressions else "" 1267 return f"DESCRIBE{kind}{this}{expressions}" 1268 1269 def generatedasidentitycolumnconstraint_sql( 1270 self, expression: exp.GeneratedAsIdentityColumnConstraint 1271 ) -> str: 1272 start = expression.args.get("start") 1273 start = f" START {start}" if start else "" 1274 increment = expression.args.get("increment") 1275 increment = f" INCREMENT {increment}" if increment else "" 1276 return f"AUTOINCREMENT{start}{increment}" 1277 1278 def cluster_sql(self, expression: exp.Cluster) -> str: 1279 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1280 1281 def struct_sql(self, expression: exp.Struct) -> str: 1282 keys = [] 1283 values = [] 1284 1285 for i, e in enumerate(expression.expressions): 1286 if isinstance(e, exp.PropertyEQ): 1287 keys.append( 1288 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1289 ) 1290 values.append(e.expression) 1291 else: 1292 keys.append(exp.Literal.string(f"_{i}")) 1293 values.append(e) 1294 1295 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1296 1297 @unsupported_args("weight", "accuracy") 1298 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1299 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1300 1301 def alterset_sql(self, expression: exp.AlterSet) -> str: 1302 exprs = self.expressions(expression, flat=True) 1303 exprs = f" {exprs}" if exprs else "" 1304 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1305 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1306 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1307 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1308 tag = self.expressions(expression, key="tag", flat=True) 1309 tag = f" TAG {tag}" if tag else "" 1310 1311 return f"SET{exprs}{file_format}{copy_options}{tag}" 1312 1313 def strtotime_sql(self, expression: exp.StrToTime): 1314 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1315 return self.func( 1316 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1317 ) 1318 1319 def timestampsub_sql(self, expression: exp.TimestampSub): 1320 return self.sql( 1321 exp.TimestampAdd( 1322 this=expression.this, 1323 expression=expression.expression * -1, 1324 unit=expression.unit, 1325 ) 1326 ) 1327 1328 def jsonextract_sql(self, expression: exp.JSONExtract): 1329 this = expression.this 1330 1331 # JSON strings are valid coming from other dialects such as BQ 1332 return self.func( 1333 "GET_PATH", 1334 exp.ParseJSON(this=this) if this.is_string else this, 1335 expression.expression, 1336 ) 1337 1338 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1339 this = expression.this 1340 if not isinstance(this, exp.TsOrDsToTimestamp): 1341 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1342 1343 return self.func("TO_CHAR", this, self.format_time(expression)) 1344 1345 def datesub_sql(self, expression: exp.DateSub) -> str: 1346 value = expression.expression 1347 if value: 1348 value.replace(value * (-1)) 1349 else: 1350 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1351 1352 return date_delta_sql("DATEADD")(self, expression) 1353 1354 def select_sql(self, expression: exp.Select) -> str: 1355 limit = expression.args.get("limit") 1356 offset = expression.args.get("offset") 1357 if offset and not limit: 1358 expression.limit(exp.Null(), copy=False) 1359 return super().select_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
376 def quote_identifier(self, expression: E, identify: bool = True) -> E: 377 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 378 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 379 if ( 380 isinstance(expression, exp.Identifier) 381 and isinstance(expression.parent, exp.Table) 382 and expression.name.lower() == "dual" 383 ): 384 return expression # type: ignore 385 386 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
388 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 389 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 390 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
392 class Parser(parser.Parser): 393 IDENTIFY_PIVOT_STRINGS = True 394 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 395 COLON_IS_VARIANT_EXTRACT = True 396 397 ID_VAR_TOKENS = { 398 *parser.Parser.ID_VAR_TOKENS, 399 TokenType.MATCH_CONDITION, 400 } 401 402 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 403 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 404 405 FUNCTIONS = { 406 **parser.Parser.FUNCTIONS, 407 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 408 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 409 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 410 this=seq_get(args, 1), expression=seq_get(args, 0) 411 ), 412 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 413 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 414 start=seq_get(args, 0), 415 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 416 step=seq_get(args, 2), 417 ), 418 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 419 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 420 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 421 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 422 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 423 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 424 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 425 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 426 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 427 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 428 "DATE_TRUNC": _date_trunc_to_time, 429 "DATEADD": _build_date_time_add(exp.DateAdd), 430 "DATEDIFF": _build_datediff, 431 "DIV0": _build_if_from_div0, 432 "EDITDISTANCE": lambda args: exp.Levenshtein( 433 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 434 ), 435 "FLATTEN": exp.Explode.from_arg_list, 436 "GET_PATH": lambda args, dialect: exp.JSONExtract( 437 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 438 ), 439 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 440 "IFF": exp.If.from_arg_list, 441 "LAST_DAY": lambda args: exp.LastDay( 442 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 443 ), 444 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 445 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 446 "NULLIFZERO": _build_if_from_nullifzero, 447 "OBJECT_CONSTRUCT": _build_object_construct, 448 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 449 "REGEXP_REPLACE": _build_regexp_replace, 450 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 451 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 452 "RLIKE": exp.RegexpLike.from_arg_list, 453 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 454 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 455 "TIMEADD": _build_date_time_add(exp.TimeAdd), 456 "TIMEDIFF": _build_datediff, 457 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 458 "TIMESTAMPDIFF": _build_datediff, 459 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 460 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 461 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 462 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 463 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 464 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 465 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 466 "TRY_TO_TIMESTAMP": _build_datetime( 467 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 468 ), 469 "TO_CHAR": build_timetostr_or_tochar, 470 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 471 "TO_NUMBER": lambda args: exp.ToNumber( 472 this=seq_get(args, 0), 473 format=seq_get(args, 1), 474 precision=seq_get(args, 2), 475 scale=seq_get(args, 3), 476 ), 477 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 478 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 479 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 480 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 481 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 482 "TO_VARCHAR": exp.ToChar.from_arg_list, 483 "ZEROIFNULL": _build_if_from_zeroifnull, 484 } 485 486 FUNCTION_PARSERS = { 487 **parser.Parser.FUNCTION_PARSERS, 488 "DATE_PART": lambda self: self._parse_date_part(), 489 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 490 "LISTAGG": lambda self: self._parse_string_agg(), 491 } 492 FUNCTION_PARSERS.pop("TRIM") 493 494 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 495 496 RANGE_PARSERS = { 497 **parser.Parser.RANGE_PARSERS, 498 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 499 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 500 } 501 502 ALTER_PARSERS = { 503 **parser.Parser.ALTER_PARSERS, 504 "UNSET": lambda self: self.expression( 505 exp.Set, 506 tag=self._match_text_seq("TAG"), 507 expressions=self._parse_csv(self._parse_id_var), 508 unset=True, 509 ), 510 } 511 512 STATEMENT_PARSERS = { 513 **parser.Parser.STATEMENT_PARSERS, 514 TokenType.PUT: lambda self: self._parse_put(), 515 TokenType.SHOW: lambda self: self._parse_show(), 516 } 517 518 PROPERTY_PARSERS = { 519 **parser.Parser.PROPERTY_PARSERS, 520 "CREDENTIALS": lambda self: self._parse_credentials_property(), 521 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 522 "LOCATION": lambda self: self._parse_location_property(), 523 "TAG": lambda self: self._parse_tag(), 524 "USING": lambda self: self._match_text_seq("TEMPLATE") 525 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 526 } 527 528 TYPE_CONVERTERS = { 529 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 530 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 531 } 532 533 SHOW_PARSERS = { 534 "DATABASES": _show_parser("DATABASES"), 535 "TERSE DATABASES": _show_parser("DATABASES"), 536 "SCHEMAS": _show_parser("SCHEMAS"), 537 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 538 "OBJECTS": _show_parser("OBJECTS"), 539 "TERSE OBJECTS": _show_parser("OBJECTS"), 540 "TABLES": _show_parser("TABLES"), 541 "TERSE TABLES": _show_parser("TABLES"), 542 "VIEWS": _show_parser("VIEWS"), 543 "TERSE VIEWS": _show_parser("VIEWS"), 544 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 545 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 546 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 547 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 548 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 549 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 550 "SEQUENCES": _show_parser("SEQUENCES"), 551 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 552 "STAGES": _show_parser("STAGES"), 553 "COLUMNS": _show_parser("COLUMNS"), 554 "USERS": _show_parser("USERS"), 555 "TERSE USERS": _show_parser("USERS"), 556 "FILE FORMATS": _show_parser("FILE FORMATS"), 557 "FUNCTIONS": _show_parser("FUNCTIONS"), 558 "PROCEDURES": _show_parser("PROCEDURES"), 559 "WAREHOUSES": _show_parser("WAREHOUSES"), 560 } 561 562 CONSTRAINT_PARSERS = { 563 **parser.Parser.CONSTRAINT_PARSERS, 564 "WITH": lambda self: self._parse_with_constraint(), 565 "MASKING": lambda self: self._parse_with_constraint(), 566 "PROJECTION": lambda self: self._parse_with_constraint(), 567 "TAG": lambda self: self._parse_with_constraint(), 568 } 569 570 STAGED_FILE_SINGLE_TOKENS = { 571 TokenType.DOT, 572 TokenType.MOD, 573 TokenType.SLASH, 574 } 575 576 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 577 578 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 579 580 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 581 582 LAMBDAS = { 583 **parser.Parser.LAMBDAS, 584 TokenType.ARROW: lambda self, expressions: self.expression( 585 exp.Lambda, 586 this=self._replace_lambda( 587 self._parse_assignment(), 588 expressions, 589 ), 590 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 591 ), 592 } 593 594 def _parse_use(self) -> exp.Use: 595 if self._match_text_seq("SECONDARY", "ROLES"): 596 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 597 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 598 return self.expression( 599 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 600 ) 601 602 return super()._parse_use() 603 604 def _negate_range( 605 self, this: t.Optional[exp.Expression] = None 606 ) -> t.Optional[exp.Expression]: 607 if not this: 608 return this 609 610 query = this.args.get("query") 611 if isinstance(this, exp.In) and isinstance(query, exp.Query): 612 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 613 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 614 # which can produce different results (most likely a SnowFlake bug). 615 # 616 # https://docs.snowflake.com/en/sql-reference/functions/in 617 # Context: https://github.com/tobymao/sqlglot/issues/3890 618 return self.expression( 619 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 620 ) 621 622 return self.expression(exp.Not, this=this) 623 624 def _parse_tag(self) -> exp.Tags: 625 return self.expression( 626 exp.Tags, 627 expressions=self._parse_wrapped_csv(self._parse_property), 628 ) 629 630 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 631 if self._prev.token_type != TokenType.WITH: 632 self._retreat(self._index - 1) 633 634 if self._match_text_seq("MASKING", "POLICY"): 635 policy = self._parse_column() 636 return self.expression( 637 exp.MaskingPolicyColumnConstraint, 638 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 639 expressions=self._match(TokenType.USING) 640 and self._parse_wrapped_csv(self._parse_id_var), 641 ) 642 if self._match_text_seq("PROJECTION", "POLICY"): 643 policy = self._parse_column() 644 return self.expression( 645 exp.ProjectionPolicyColumnConstraint, 646 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 647 ) 648 if self._match(TokenType.TAG): 649 return self._parse_tag() 650 651 return None 652 653 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 654 if self._match(TokenType.TAG): 655 return self._parse_tag() 656 657 return super()._parse_with_property() 658 659 def _parse_create(self) -> exp.Create | exp.Command: 660 expression = super()._parse_create() 661 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 662 # Replace the Table node with the enclosed Identifier 663 expression.this.replace(expression.this.this) 664 665 return expression 666 667 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 668 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 669 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 670 this = self._parse_var() or self._parse_type() 671 672 if not this: 673 return None 674 675 self._match(TokenType.COMMA) 676 expression = self._parse_bitwise() 677 this = map_date_part(this) 678 name = this.name.upper() 679 680 if name.startswith("EPOCH"): 681 if name == "EPOCH_MILLISECOND": 682 scale = 10**3 683 elif name == "EPOCH_MICROSECOND": 684 scale = 10**6 685 elif name == "EPOCH_NANOSECOND": 686 scale = 10**9 687 else: 688 scale = None 689 690 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 691 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 692 693 if scale: 694 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 695 696 return to_unix 697 698 return self.expression(exp.Extract, this=this, expression=expression) 699 700 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 701 if is_map: 702 # Keys are strings in Snowflake's objects, see also: 703 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 704 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 705 return self._parse_slice(self._parse_string()) 706 707 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 708 709 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 710 lateral = super()._parse_lateral() 711 if not lateral: 712 return lateral 713 714 if isinstance(lateral.this, exp.Explode): 715 table_alias = lateral.args.get("alias") 716 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 717 if table_alias and not table_alias.args.get("columns"): 718 table_alias.set("columns", columns) 719 elif not table_alias: 720 exp.alias_(lateral, "_flattened", table=columns, copy=False) 721 722 return lateral 723 724 def _parse_table_parts( 725 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 726 ) -> exp.Table: 727 # https://docs.snowflake.com/en/user-guide/querying-stage 728 if self._match(TokenType.STRING, advance=False): 729 table = self._parse_string() 730 elif self._match_text_seq("@", advance=False): 731 table = self._parse_location_path() 732 else: 733 table = None 734 735 if table: 736 file_format = None 737 pattern = None 738 739 wrapped = self._match(TokenType.L_PAREN) 740 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 741 if self._match_text_seq("FILE_FORMAT", "=>"): 742 file_format = self._parse_string() or super()._parse_table_parts( 743 is_db_reference=is_db_reference 744 ) 745 elif self._match_text_seq("PATTERN", "=>"): 746 pattern = self._parse_string() 747 else: 748 break 749 750 self._match(TokenType.COMMA) 751 752 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 753 else: 754 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 755 756 return table 757 758 def _parse_table( 759 self, 760 schema: bool = False, 761 joins: bool = False, 762 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 763 parse_bracket: bool = False, 764 is_db_reference: bool = False, 765 parse_partition: bool = False, 766 ) -> t.Optional[exp.Expression]: 767 table = super()._parse_table( 768 schema=schema, 769 joins=joins, 770 alias_tokens=alias_tokens, 771 parse_bracket=parse_bracket, 772 is_db_reference=is_db_reference, 773 parse_partition=parse_partition, 774 ) 775 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 776 table_from_rows = table.this 777 for arg in exp.TableFromRows.arg_types: 778 if arg != "this": 779 table_from_rows.set(arg, table.args.get(arg)) 780 781 table = table_from_rows 782 783 return table 784 785 def _parse_id_var( 786 self, 787 any_token: bool = True, 788 tokens: t.Optional[t.Collection[TokenType]] = None, 789 ) -> t.Optional[exp.Expression]: 790 if self._match_text_seq("IDENTIFIER", "("): 791 identifier = ( 792 super()._parse_id_var(any_token=any_token, tokens=tokens) 793 or self._parse_string() 794 ) 795 self._match_r_paren() 796 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 797 798 return super()._parse_id_var(any_token=any_token, tokens=tokens) 799 800 def _parse_show_snowflake(self, this: str) -> exp.Show: 801 scope = None 802 scope_kind = None 803 804 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 805 # which is syntactically valid but has no effect on the output 806 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 807 808 history = self._match_text_seq("HISTORY") 809 810 like = self._parse_string() if self._match(TokenType.LIKE) else None 811 812 if self._match(TokenType.IN): 813 if self._match_text_seq("ACCOUNT"): 814 scope_kind = "ACCOUNT" 815 elif self._match_text_seq("CLASS"): 816 scope_kind = "CLASS" 817 scope = self._parse_table_parts() 818 elif self._match_text_seq("APPLICATION"): 819 scope_kind = "APPLICATION" 820 if self._match_text_seq("PACKAGE"): 821 scope_kind += " PACKAGE" 822 scope = self._parse_table_parts() 823 elif self._match_set(self.DB_CREATABLES): 824 scope_kind = self._prev.text.upper() 825 if self._curr: 826 scope = self._parse_table_parts() 827 elif self._curr: 828 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 829 scope = self._parse_table_parts() 830 831 return self.expression( 832 exp.Show, 833 **{ 834 "terse": terse, 835 "this": this, 836 "history": history, 837 "like": like, 838 "scope": scope, 839 "scope_kind": scope_kind, 840 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 841 "limit": self._parse_limit(), 842 "from": self._parse_string() if self._match(TokenType.FROM) else None, 843 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 844 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 845 }, 846 ) 847 848 def _parse_put(self) -> exp.Put | exp.Command: 849 if self._curr.token_type != TokenType.STRING: 850 return self._parse_as_command(self._prev) 851 852 return self.expression( 853 exp.Put, 854 this=self._parse_string(), 855 target=self._parse_location_path(), 856 properties=self._parse_properties(), 857 ) 858 859 def _parse_location_property(self) -> exp.LocationProperty: 860 self._match(TokenType.EQ) 861 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 862 863 def _parse_file_location(self) -> t.Optional[exp.Expression]: 864 # Parse either a subquery or a staged file 865 return ( 866 self._parse_select(table=True, parse_subquery_alias=False) 867 if self._match(TokenType.L_PAREN, advance=False) 868 else self._parse_table_parts() 869 ) 870 871 def _parse_location_path(self) -> exp.Var: 872 start = self._curr 873 self._advance_any(ignore_reserved=True) 874 875 # We avoid consuming a comma token because external tables like @foo and @bar 876 # can be joined in a query with a comma separator, as well as closing paren 877 # in case of subqueries 878 while self._is_connected() and not self._match_set( 879 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 880 ): 881 self._advance_any(ignore_reserved=True) 882 883 return exp.var(self._find_sql(start, self._prev)) 884 885 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 886 this = super()._parse_lambda_arg() 887 888 if not this: 889 return this 890 891 typ = self._parse_types() 892 893 if typ: 894 return self.expression(exp.Cast, this=this, to=typ) 895 896 return this 897 898 def _parse_foreign_key(self) -> exp.ForeignKey: 899 # inlineFK, the REFERENCES columns are implied 900 if self._match(TokenType.REFERENCES, advance=False): 901 return self.expression(exp.ForeignKey) 902 903 # outoflineFK, explicitly names the columns 904 return super()._parse_foreign_key() 905 906 def _parse_file_format_property(self) -> exp.FileFormatProperty: 907 self._match(TokenType.EQ) 908 if self._match(TokenType.L_PAREN, advance=False): 909 expressions = self._parse_wrapped_options() 910 else: 911 expressions = [self._parse_format_name()] 912 913 return self.expression( 914 exp.FileFormatProperty, 915 expressions=expressions, 916 ) 917 918 def _parse_credentials_property(self) -> exp.CredentialsProperty: 919 return self.expression( 920 exp.CredentialsProperty, 921 expressions=self._parse_wrapped_options(), 922 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
924 class Tokenizer(tokens.Tokenizer): 925 STRING_ESCAPES = ["\\", "'"] 926 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 927 RAW_STRINGS = ["$$"] 928 COMMENTS = ["--", "//", ("/*", "*/")] 929 NESTED_COMMENTS = False 930 931 KEYWORDS = { 932 **tokens.Tokenizer.KEYWORDS, 933 "FILE://": TokenType.URI_START, 934 "BYTEINT": TokenType.INT, 935 "CHAR VARYING": TokenType.VARCHAR, 936 "CHARACTER VARYING": TokenType.VARCHAR, 937 "EXCLUDE": TokenType.EXCEPT, 938 "FILE FORMAT": TokenType.FILE_FORMAT, 939 "ILIKE ANY": TokenType.ILIKE_ANY, 940 "LIKE ANY": TokenType.LIKE_ANY, 941 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 942 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 943 "MINUS": TokenType.EXCEPT, 944 "NCHAR VARYING": TokenType.VARCHAR, 945 "PUT": TokenType.PUT, 946 "REMOVE": TokenType.COMMAND, 947 "RM": TokenType.COMMAND, 948 "SAMPLE": TokenType.TABLE_SAMPLE, 949 "SQL_DOUBLE": TokenType.DOUBLE, 950 "SQL_VARCHAR": TokenType.VARCHAR, 951 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 952 "TAG": TokenType.TAG, 953 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 954 "TOP": TokenType.TOP, 955 "WAREHOUSE": TokenType.WAREHOUSE, 956 "STAGE": TokenType.STAGE, 957 "STREAMLIT": TokenType.STREAMLIT, 958 } 959 KEYWORDS.pop("/*+") 960 961 SINGLE_TOKENS = { 962 **tokens.Tokenizer.SINGLE_TOKENS, 963 "$": TokenType.PARAMETER, 964 } 965 966 VAR_SINGLE_TOKENS = {"$"} 967 968 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
970 class Generator(generator.Generator): 971 PARAMETER_TOKEN = "$" 972 MATCHED_BY_SOURCE = False 973 SINGLE_STRING_INTERVAL = True 974 JOIN_HINTS = False 975 TABLE_HINTS = False 976 QUERY_HINTS = False 977 AGGREGATE_FILTER_SUPPORTED = False 978 SUPPORTS_TABLE_COPY = False 979 COLLATE_IS_FUNC = True 980 LIMIT_ONLY_LITERALS = True 981 JSON_KEY_VALUE_PAIR_SEP = "," 982 INSERT_OVERWRITE = " OVERWRITE INTO" 983 STRUCT_DELIMITER = ("(", ")") 984 COPY_PARAMS_ARE_WRAPPED = False 985 COPY_PARAMS_EQ_REQUIRED = True 986 STAR_EXCEPT = "EXCLUDE" 987 SUPPORTS_EXPLODING_PROJECTIONS = False 988 ARRAY_CONCAT_IS_VAR_LEN = False 989 SUPPORTS_CONVERT_TIMEZONE = True 990 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 991 SUPPORTS_MEDIAN = True 992 ARRAY_SIZE_NAME = "ARRAY_SIZE" 993 994 TRANSFORMS = { 995 **generator.Generator.TRANSFORMS, 996 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 997 exp.ArgMax: rename_func("MAX_BY"), 998 exp.ArgMin: rename_func("MIN_BY"), 999 exp.Array: inline_array_sql, 1000 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1001 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1002 exp.AtTimeZone: lambda self, e: self.func( 1003 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1004 ), 1005 exp.BitwiseOr: rename_func("BITOR"), 1006 exp.BitwiseXor: rename_func("BITXOR"), 1007 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1008 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1009 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1010 exp.DateAdd: date_delta_sql("DATEADD"), 1011 exp.DateDiff: date_delta_sql("DATEDIFF"), 1012 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1013 exp.DatetimeDiff: timestampdiff_sql, 1014 exp.DateStrToDate: datestrtodate_sql, 1015 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1016 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1017 exp.DayOfYear: rename_func("DAYOFYEAR"), 1018 exp.Explode: rename_func("FLATTEN"), 1019 exp.Extract: rename_func("DATE_PART"), 1020 exp.FileFormatProperty: lambda self, 1021 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1022 exp.FromTimeZone: lambda self, e: self.func( 1023 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1024 ), 1025 exp.GenerateSeries: lambda self, e: self.func( 1026 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1027 ), 1028 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1029 exp.If: if_sql(name="IFF", false_value="NULL"), 1030 exp.JSONExtractArray: _json_extract_value_array_sql, 1031 exp.JSONExtractScalar: lambda self, e: self.func( 1032 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1033 ), 1034 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1035 exp.JSONPathRoot: lambda *_: "", 1036 exp.JSONValueArray: _json_extract_value_array_sql, 1037 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1038 rename_func("EDITDISTANCE") 1039 ), 1040 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1041 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1042 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1043 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1044 exp.MakeInterval: no_make_interval_sql, 1045 exp.Max: max_or_greatest, 1046 exp.Min: min_or_least, 1047 exp.ParseJSON: lambda self, e: self.func( 1048 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1049 ), 1050 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1051 exp.PercentileCont: transforms.preprocess( 1052 [transforms.add_within_group_for_percentiles] 1053 ), 1054 exp.PercentileDisc: transforms.preprocess( 1055 [transforms.add_within_group_for_percentiles] 1056 ), 1057 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1058 exp.RegexpExtract: _regexpextract_sql, 1059 exp.RegexpExtractAll: _regexpextract_sql, 1060 exp.RegexpILike: _regexpilike_sql, 1061 exp.Rand: rename_func("RANDOM"), 1062 exp.Select: transforms.preprocess( 1063 [ 1064 transforms.eliminate_distinct_on, 1065 transforms.explode_projection_to_unnest(), 1066 transforms.eliminate_semi_and_anti_joins, 1067 _transform_generate_date_array, 1068 ] 1069 ), 1070 exp.SHA: rename_func("SHA1"), 1071 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1072 exp.StartsWith: rename_func("STARTSWITH"), 1073 exp.StrPosition: lambda self, e: strposition_sql( 1074 self, e, func_name="CHARINDEX", supports_position=True 1075 ), 1076 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1077 exp.Stuff: rename_func("INSERT"), 1078 exp.TimeAdd: date_delta_sql("TIMEADD"), 1079 exp.Timestamp: no_timestamp_sql, 1080 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1081 exp.TimestampDiff: lambda self, e: self.func( 1082 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1083 ), 1084 exp.TimestampTrunc: timestamptrunc_sql(), 1085 exp.TimeStrToTime: timestrtotime_sql, 1086 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1087 exp.ToArray: rename_func("TO_ARRAY"), 1088 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1089 exp.ToDouble: rename_func("TO_DOUBLE"), 1090 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1091 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1092 exp.TsOrDsToDate: lambda self, e: self.func( 1093 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1094 ), 1095 exp.TsOrDsToTime: lambda self, e: self.func( 1096 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1097 ), 1098 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1099 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1100 exp.Uuid: rename_func("UUID_STRING"), 1101 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1102 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1103 exp.Xor: rename_func("BOOLXOR"), 1104 } 1105 1106 SUPPORTED_JSON_PATH_PARTS = { 1107 exp.JSONPathKey, 1108 exp.JSONPathRoot, 1109 exp.JSONPathSubscript, 1110 } 1111 1112 TYPE_MAPPING = { 1113 **generator.Generator.TYPE_MAPPING, 1114 exp.DataType.Type.NESTED: "OBJECT", 1115 exp.DataType.Type.STRUCT: "OBJECT", 1116 } 1117 1118 TOKEN_MAPPING = { 1119 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1120 } 1121 1122 PROPERTIES_LOCATION = { 1123 **generator.Generator.PROPERTIES_LOCATION, 1124 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1125 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1126 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1127 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1128 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1129 } 1130 1131 UNSUPPORTED_VALUES_EXPRESSIONS = { 1132 exp.Map, 1133 exp.StarMap, 1134 exp.Struct, 1135 exp.VarMap, 1136 } 1137 1138 def with_properties(self, properties: exp.Properties) -> str: 1139 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1140 1141 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1142 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1143 values_as_table = False 1144 1145 return super().values_sql(expression, values_as_table=values_as_table) 1146 1147 def datatype_sql(self, expression: exp.DataType) -> str: 1148 expressions = expression.expressions 1149 if ( 1150 expressions 1151 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1152 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1153 ): 1154 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1155 return "OBJECT" 1156 1157 return super().datatype_sql(expression) 1158 1159 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1160 return self.func( 1161 "TO_NUMBER", 1162 expression.this, 1163 expression.args.get("format"), 1164 expression.args.get("precision"), 1165 expression.args.get("scale"), 1166 ) 1167 1168 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1169 milli = expression.args.get("milli") 1170 if milli is not None: 1171 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1172 expression.set("nano", milli_to_nano) 1173 1174 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1175 1176 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1177 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1178 return self.func("TO_GEOGRAPHY", expression.this) 1179 if expression.is_type(exp.DataType.Type.GEOMETRY): 1180 return self.func("TO_GEOMETRY", expression.this) 1181 1182 return super().cast_sql(expression, safe_prefix=safe_prefix) 1183 1184 def trycast_sql(self, expression: exp.TryCast) -> str: 1185 value = expression.this 1186 1187 if value.type is None: 1188 from sqlglot.optimizer.annotate_types import annotate_types 1189 1190 value = annotate_types(value, dialect=self.dialect) 1191 1192 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1193 return super().trycast_sql(expression) 1194 1195 # TRY_CAST only works for string values in Snowflake 1196 return self.cast_sql(expression) 1197 1198 def log_sql(self, expression: exp.Log) -> str: 1199 if not expression.expression: 1200 return self.func("LN", expression.this) 1201 1202 return super().log_sql(expression) 1203 1204 def unnest_sql(self, expression: exp.Unnest) -> str: 1205 unnest_alias = expression.args.get("alias") 1206 offset = expression.args.get("offset") 1207 1208 columns = [ 1209 exp.to_identifier("seq"), 1210 exp.to_identifier("key"), 1211 exp.to_identifier("path"), 1212 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1213 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1214 or exp.to_identifier("value"), 1215 exp.to_identifier("this"), 1216 ] 1217 1218 if unnest_alias: 1219 unnest_alias.set("columns", columns) 1220 else: 1221 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1222 1223 table_input = self.sql(expression.expressions[0]) 1224 if not table_input.startswith("INPUT =>"): 1225 table_input = f"INPUT => {table_input}" 1226 1227 explode = f"TABLE(FLATTEN({table_input}))" 1228 alias = self.sql(unnest_alias) 1229 alias = f" AS {alias}" if alias else "" 1230 return f"{explode}{alias}" 1231 1232 def show_sql(self, expression: exp.Show) -> str: 1233 terse = "TERSE " if expression.args.get("terse") else "" 1234 history = " HISTORY" if expression.args.get("history") else "" 1235 like = self.sql(expression, "like") 1236 like = f" LIKE {like}" if like else "" 1237 1238 scope = self.sql(expression, "scope") 1239 scope = f" {scope}" if scope else "" 1240 1241 scope_kind = self.sql(expression, "scope_kind") 1242 if scope_kind: 1243 scope_kind = f" IN {scope_kind}" 1244 1245 starts_with = self.sql(expression, "starts_with") 1246 if starts_with: 1247 starts_with = f" STARTS WITH {starts_with}" 1248 1249 limit = self.sql(expression, "limit") 1250 1251 from_ = self.sql(expression, "from") 1252 if from_: 1253 from_ = f" FROM {from_}" 1254 1255 privileges = self.expressions(expression, key="privileges", flat=True) 1256 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1257 1258 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1259 1260 def describe_sql(self, expression: exp.Describe) -> str: 1261 # Default to table if kind is unknown 1262 kind_value = expression.args.get("kind") or "TABLE" 1263 kind = f" {kind_value}" if kind_value else "" 1264 this = f" {self.sql(expression, 'this')}" 1265 expressions = self.expressions(expression, flat=True) 1266 expressions = f" {expressions}" if expressions else "" 1267 return f"DESCRIBE{kind}{this}{expressions}" 1268 1269 def generatedasidentitycolumnconstraint_sql( 1270 self, expression: exp.GeneratedAsIdentityColumnConstraint 1271 ) -> str: 1272 start = expression.args.get("start") 1273 start = f" START {start}" if start else "" 1274 increment = expression.args.get("increment") 1275 increment = f" INCREMENT {increment}" if increment else "" 1276 return f"AUTOINCREMENT{start}{increment}" 1277 1278 def cluster_sql(self, expression: exp.Cluster) -> str: 1279 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1280 1281 def struct_sql(self, expression: exp.Struct) -> str: 1282 keys = [] 1283 values = [] 1284 1285 for i, e in enumerate(expression.expressions): 1286 if isinstance(e, exp.PropertyEQ): 1287 keys.append( 1288 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1289 ) 1290 values.append(e.expression) 1291 else: 1292 keys.append(exp.Literal.string(f"_{i}")) 1293 values.append(e) 1294 1295 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1296 1297 @unsupported_args("weight", "accuracy") 1298 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1299 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1300 1301 def alterset_sql(self, expression: exp.AlterSet) -> str: 1302 exprs = self.expressions(expression, flat=True) 1303 exprs = f" {exprs}" if exprs else "" 1304 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1305 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1306 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1307 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1308 tag = self.expressions(expression, key="tag", flat=True) 1309 tag = f" TAG {tag}" if tag else "" 1310 1311 return f"SET{exprs}{file_format}{copy_options}{tag}" 1312 1313 def strtotime_sql(self, expression: exp.StrToTime): 1314 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1315 return self.func( 1316 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1317 ) 1318 1319 def timestampsub_sql(self, expression: exp.TimestampSub): 1320 return self.sql( 1321 exp.TimestampAdd( 1322 this=expression.this, 1323 expression=expression.expression * -1, 1324 unit=expression.unit, 1325 ) 1326 ) 1327 1328 def jsonextract_sql(self, expression: exp.JSONExtract): 1329 this = expression.this 1330 1331 # JSON strings are valid coming from other dialects such as BQ 1332 return self.func( 1333 "GET_PATH", 1334 exp.ParseJSON(this=this) if this.is_string else this, 1335 expression.expression, 1336 ) 1337 1338 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1339 this = expression.this 1340 if not isinstance(this, exp.TsOrDsToTimestamp): 1341 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1342 1343 return self.func("TO_CHAR", this, self.format_time(expression)) 1344 1345 def datesub_sql(self, expression: exp.DateSub) -> str: 1346 value = expression.expression 1347 if value: 1348 value.replace(value * (-1)) 1349 else: 1350 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1351 1352 return date_delta_sql("DATEADD")(self, expression) 1353 1354 def select_sql(self, expression: exp.Select) -> str: 1355 limit = expression.args.get("limit") 1356 offset = expression.args.get("offset") 1357 if offset and not limit: 1358 expression.limit(exp.Null(), copy=False) 1359 return super().select_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1147 def datatype_sql(self, expression: exp.DataType) -> str: 1148 expressions = expression.expressions 1149 if ( 1150 expressions 1151 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1152 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1153 ): 1154 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1155 return "OBJECT" 1156 1157 return super().datatype_sql(expression)
1168 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1169 milli = expression.args.get("milli") 1170 if milli is not None: 1171 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1172 expression.set("nano", milli_to_nano) 1173 1174 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1176 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1177 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1178 return self.func("TO_GEOGRAPHY", expression.this) 1179 if expression.is_type(exp.DataType.Type.GEOMETRY): 1180 return self.func("TO_GEOMETRY", expression.this) 1181 1182 return super().cast_sql(expression, safe_prefix=safe_prefix)
1184 def trycast_sql(self, expression: exp.TryCast) -> str: 1185 value = expression.this 1186 1187 if value.type is None: 1188 from sqlglot.optimizer.annotate_types import annotate_types 1189 1190 value = annotate_types(value, dialect=self.dialect) 1191 1192 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1193 return super().trycast_sql(expression) 1194 1195 # TRY_CAST only works for string values in Snowflake 1196 return self.cast_sql(expression)
1204 def unnest_sql(self, expression: exp.Unnest) -> str: 1205 unnest_alias = expression.args.get("alias") 1206 offset = expression.args.get("offset") 1207 1208 columns = [ 1209 exp.to_identifier("seq"), 1210 exp.to_identifier("key"), 1211 exp.to_identifier("path"), 1212 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1213 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1214 or exp.to_identifier("value"), 1215 exp.to_identifier("this"), 1216 ] 1217 1218 if unnest_alias: 1219 unnest_alias.set("columns", columns) 1220 else: 1221 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1222 1223 table_input = self.sql(expression.expressions[0]) 1224 if not table_input.startswith("INPUT =>"): 1225 table_input = f"INPUT => {table_input}" 1226 1227 explode = f"TABLE(FLATTEN({table_input}))" 1228 alias = self.sql(unnest_alias) 1229 alias = f" AS {alias}" if alias else "" 1230 return f"{explode}{alias}"
1232 def show_sql(self, expression: exp.Show) -> str: 1233 terse = "TERSE " if expression.args.get("terse") else "" 1234 history = " HISTORY" if expression.args.get("history") else "" 1235 like = self.sql(expression, "like") 1236 like = f" LIKE {like}" if like else "" 1237 1238 scope = self.sql(expression, "scope") 1239 scope = f" {scope}" if scope else "" 1240 1241 scope_kind = self.sql(expression, "scope_kind") 1242 if scope_kind: 1243 scope_kind = f" IN {scope_kind}" 1244 1245 starts_with = self.sql(expression, "starts_with") 1246 if starts_with: 1247 starts_with = f" STARTS WITH {starts_with}" 1248 1249 limit = self.sql(expression, "limit") 1250 1251 from_ = self.sql(expression, "from") 1252 if from_: 1253 from_ = f" FROM {from_}" 1254 1255 privileges = self.expressions(expression, key="privileges", flat=True) 1256 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1257 1258 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1260 def describe_sql(self, expression: exp.Describe) -> str: 1261 # Default to table if kind is unknown 1262 kind_value = expression.args.get("kind") or "TABLE" 1263 kind = f" {kind_value}" if kind_value else "" 1264 this = f" {self.sql(expression, 'this')}" 1265 expressions = self.expressions(expression, flat=True) 1266 expressions = f" {expressions}" if expressions else "" 1267 return f"DESCRIBE{kind}{this}{expressions}"
1269 def generatedasidentitycolumnconstraint_sql( 1270 self, expression: exp.GeneratedAsIdentityColumnConstraint 1271 ) -> str: 1272 start = expression.args.get("start") 1273 start = f" START {start}" if start else "" 1274 increment = expression.args.get("increment") 1275 increment = f" INCREMENT {increment}" if increment else "" 1276 return f"AUTOINCREMENT{start}{increment}"
1281 def struct_sql(self, expression: exp.Struct) -> str: 1282 keys = [] 1283 values = [] 1284 1285 for i, e in enumerate(expression.expressions): 1286 if isinstance(e, exp.PropertyEQ): 1287 keys.append( 1288 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1289 ) 1290 values.append(e.expression) 1291 else: 1292 keys.append(exp.Literal.string(f"_{i}")) 1293 values.append(e) 1294 1295 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1301 def alterset_sql(self, expression: exp.AlterSet) -> str: 1302 exprs = self.expressions(expression, flat=True) 1303 exprs = f" {exprs}" if exprs else "" 1304 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1305 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1306 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1307 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1308 tag = self.expressions(expression, key="tag", flat=True) 1309 tag = f" TAG {tag}" if tag else "" 1310 1311 return f"SET{exprs}{file_format}{copy_options}{tag}"
1345 def datesub_sql(self, expression: exp.DateSub) -> str: 1346 value = expression.expression 1347 if value: 1348 value.replace(value * (-1)) 1349 else: 1350 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1351 1352 return date_delta_sql("DATEADD")(self, expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- put_sql