sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25 no_timestamp_sql, 26 strposition_sql, 27 timestampdiff_sql, 28 no_make_interval_sql, 29) 30from sqlglot.generator import unsupported_args 31from sqlglot.helper import flatten, is_float, is_int, seq_get 32from sqlglot.tokens import TokenType 33 34if t.TYPE_CHECKING: 35 from sqlglot._typing import E, B 36 37 38# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 39def _build_datetime( 40 name: str, kind: exp.DataType.Type, safe: bool = False 41) -> t.Callable[[t.List], exp.Func]: 42 def _builder(args: t.List) -> exp.Func: 43 value = seq_get(args, 0) 44 scale_or_fmt = seq_get(args, 1) 45 46 int_value = value is not None and is_int(value.name) 47 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 48 49 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 50 # Converts calls like `TO_TIME('01:02:03')` into casts 51 if len(args) == 1 and value.is_string and not int_value: 52 return ( 53 exp.TryCast(this=value, to=exp.DataType.build(kind)) 54 if safe 55 else exp.cast(value, kind) 56 ) 57 58 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 59 # cases so we can transpile them, since they're relatively common 60 if kind == exp.DataType.Type.TIMESTAMP: 61 if not safe and (int_value or int_scale_or_fmt): 62 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 63 # it's not easily transpilable 64 return exp.UnixToTime(this=value, scale=scale_or_fmt) 65 if not int_scale_or_fmt and not is_float(value.name): 66 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 67 expr.set("safe", safe) 68 return expr 69 70 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 71 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 72 formatted_exp = build_formatted_time(klass, "snowflake")(args) 73 formatted_exp.set("safe", safe) 74 return formatted_exp 75 76 return exp.Anonymous(this=name, expressions=args) 77 78 return _builder 79 80 81def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 82 expression = parser.build_var_map(args) 83 84 if isinstance(expression, exp.StarMap): 85 return expression 86 87 return exp.Struct( 88 expressions=[ 89 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 90 ] 91 ) 92 93 94def _build_datediff(args: t.List) -> exp.DateDiff: 95 return exp.DateDiff( 96 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 97 ) 98 99 100def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 101 def _builder(args: t.List) -> E: 102 return expr_type( 103 this=seq_get(args, 2), 104 expression=seq_get(args, 1), 105 unit=map_date_part(seq_get(args, 0)), 106 ) 107 108 return _builder 109 110 111def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 112 def _builder(args: t.List) -> B | exp.Anonymous: 113 if len(args) == 3: 114 return exp.Anonymous(this=name, expressions=args) 115 116 return binary_from_function(expr_type)(args) 117 118 return _builder 119 120 121# https://docs.snowflake.com/en/sql-reference/functions/div0 122def _build_if_from_div0(args: t.List) -> exp.If: 123 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 124 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 125 126 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 127 exp.Is(this=lhs, expression=exp.null()).not_() 128 ) 129 true = exp.Literal.number(0) 130 false = exp.Div(this=lhs, expression=rhs) 131 return exp.If(this=cond, true=true, false=false) 132 133 134# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 135def _build_if_from_zeroifnull(args: t.List) -> exp.If: 136 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 137 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 138 139 140# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 141def _build_if_from_nullifzero(args: t.List) -> exp.If: 142 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 143 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 144 145 146def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 147 flag = expression.text("flag") 148 149 if "i" not in flag: 150 flag += "i" 151 152 return self.func( 153 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 154 ) 155 156 157def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 158 regexp_replace = exp.RegexpReplace.from_arg_list(args) 159 160 if not regexp_replace.args.get("replacement"): 161 regexp_replace.set("replacement", exp.Literal.string("")) 162 163 return regexp_replace 164 165 166def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 167 def _parse(self: Snowflake.Parser) -> exp.Show: 168 return self._parse_show_snowflake(*args, **kwargs) 169 170 return _parse 171 172 173def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 174 trunc = date_trunc_to_time(args) 175 trunc.set("unit", map_date_part(trunc.args["unit"])) 176 return trunc 177 178 179def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 180 """ 181 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 182 so we need to unqualify them. 183 184 Example: 185 >>> from sqlglot import parse_one 186 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 187 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 188 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 189 """ 190 if isinstance(expression, exp.Pivot) and expression.unpivot: 191 expression = transforms.unqualify_columns(expression) 192 193 return expression 194 195 196def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 197 assert isinstance(expression, exp.Create) 198 199 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 200 if expression.this in exp.DataType.NESTED_TYPES: 201 expression.set("expressions", None) 202 return expression 203 204 props = expression.args.get("properties") 205 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 206 for schema_expression in expression.this.expressions: 207 if isinstance(schema_expression, exp.ColumnDef): 208 column_type = schema_expression.kind 209 if isinstance(column_type, exp.DataType): 210 column_type.transform(_flatten_structured_type, copy=False) 211 212 return expression 213 214 215def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 216 generate_date_array = unnest.expressions[0] 217 start = generate_date_array.args.get("start") 218 end = generate_date_array.args.get("end") 219 step = generate_date_array.args.get("step") 220 221 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 222 return 223 224 unit = step.args.get("unit") 225 226 unnest_alias = unnest.args.get("alias") 227 if unnest_alias: 228 unnest_alias = unnest_alias.copy() 229 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 230 else: 231 sequence_value_name = "value" 232 233 # We'll add the next sequence value to the starting date and project the result 234 date_add = _build_date_time_add(exp.DateAdd)( 235 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 236 ).as_(sequence_value_name) 237 238 # We use DATEDIFF to compute the number of sequence values needed 239 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 240 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 241 ) 242 243 unnest.set("expressions", [number_sequence]) 244 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 245 246 247def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 248 if isinstance(expression, exp.Select): 249 for generate_date_array in expression.find_all(exp.GenerateDateArray): 250 parent = generate_date_array.parent 251 252 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 253 # query is the following (it'll be unnested properly on the next iteration due to copy): 254 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 255 if not isinstance(parent, exp.Unnest): 256 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 257 generate_date_array.replace( 258 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 259 ) 260 261 if ( 262 isinstance(parent, exp.Unnest) 263 and isinstance(parent.parent, (exp.From, exp.Join)) 264 and len(parent.expressions) == 1 265 ): 266 _unnest_generate_date_array(parent) 267 268 return expression 269 270 271def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 272 def _builder(args: t.List) -> E: 273 return expr_type( 274 this=seq_get(args, 0), 275 expression=seq_get(args, 1), 276 position=seq_get(args, 2), 277 occurrence=seq_get(args, 3), 278 parameters=seq_get(args, 4), 279 group=seq_get(args, 5) or exp.Literal.number(0), 280 ) 281 282 return _builder 283 284 285def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 286 # Other dialects don't support all of the following parameters, so we need to 287 # generate default values as necessary to ensure the transpilation is correct 288 group = expression.args.get("group") 289 290 # To avoid generating all these default values, we set group to None if 291 # it's 0 (also default value) which doesn't trigger the following chain 292 if group and group.name == "0": 293 group = None 294 295 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 296 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 297 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 298 299 return self.func( 300 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 301 expression.this, 302 expression.expression, 303 position, 304 occurrence, 305 parameters, 306 group, 307 ) 308 309 310def _json_extract_value_array_sql( 311 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 312) -> str: 313 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 314 ident = exp.to_identifier("x") 315 316 if isinstance(expression, exp.JSONValueArray): 317 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 318 else: 319 this = exp.ParseJSON(this=f"TO_JSON({ident})") 320 321 transform_lambda = exp.Lambda(expressions=[ident], this=this) 322 323 return self.func("TRANSFORM", json_extract, transform_lambda) 324 325 326class Snowflake(Dialect): 327 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 328 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 329 NULL_ORDERING = "nulls_are_large" 330 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 331 SUPPORTS_USER_DEFINED_TYPES = False 332 SUPPORTS_SEMI_ANTI_JOIN = False 333 PREFER_CTE_ALIAS_COLUMN = True 334 TABLESAMPLE_SIZE_IS_PERCENT = True 335 COPY_PARAMS_ARE_CSV = False 336 ARRAY_AGG_INCLUDES_NULLS = None 337 338 TIME_MAPPING = { 339 "YYYY": "%Y", 340 "yyyy": "%Y", 341 "YY": "%y", 342 "yy": "%y", 343 "MMMM": "%B", 344 "mmmm": "%B", 345 "MON": "%b", 346 "mon": "%b", 347 "MM": "%m", 348 "mm": "%m", 349 "DD": "%d", 350 "dd": "%-d", 351 "DY": "%a", 352 "dy": "%w", 353 "HH24": "%H", 354 "hh24": "%H", 355 "HH12": "%I", 356 "hh12": "%I", 357 "MI": "%M", 358 "mi": "%M", 359 "SS": "%S", 360 "ss": "%S", 361 "FF6": "%f", 362 "ff6": "%f", 363 } 364 365 def quote_identifier(self, expression: E, identify: bool = True) -> E: 366 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 367 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 368 if ( 369 isinstance(expression, exp.Identifier) 370 and isinstance(expression.parent, exp.Table) 371 and expression.name.lower() == "dual" 372 ): 373 return expression # type: ignore 374 375 return super().quote_identifier(expression, identify=identify) 376 377 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 378 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 379 SINGLE_TOKENS.pop("$") 380 381 class Parser(parser.Parser): 382 IDENTIFY_PIVOT_STRINGS = True 383 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 384 COLON_IS_VARIANT_EXTRACT = True 385 386 ID_VAR_TOKENS = { 387 *parser.Parser.ID_VAR_TOKENS, 388 TokenType.MATCH_CONDITION, 389 } 390 391 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 392 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 393 394 FUNCTIONS = { 395 **parser.Parser.FUNCTIONS, 396 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 397 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 398 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 399 this=seq_get(args, 1), expression=seq_get(args, 0) 400 ), 401 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 402 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 403 start=seq_get(args, 0), 404 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 405 step=seq_get(args, 2), 406 ), 407 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 408 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 409 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 410 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 411 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 412 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 413 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 414 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 415 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 416 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 417 "DATE_TRUNC": _date_trunc_to_time, 418 "DATEADD": _build_date_time_add(exp.DateAdd), 419 "DATEDIFF": _build_datediff, 420 "DIV0": _build_if_from_div0, 421 "EDITDISTANCE": lambda args: exp.Levenshtein( 422 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 423 ), 424 "FLATTEN": exp.Explode.from_arg_list, 425 "GET_PATH": lambda args, dialect: exp.JSONExtract( 426 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 427 ), 428 "IFF": exp.If.from_arg_list, 429 "LAST_DAY": lambda args: exp.LastDay( 430 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 431 ), 432 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 433 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 434 "LISTAGG": exp.GroupConcat.from_arg_list, 435 "NULLIFZERO": _build_if_from_nullifzero, 436 "OBJECT_CONSTRUCT": _build_object_construct, 437 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 438 "REGEXP_REPLACE": _build_regexp_replace, 439 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 440 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 441 "RLIKE": exp.RegexpLike.from_arg_list, 442 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 443 "TIMEADD": _build_date_time_add(exp.TimeAdd), 444 "TIMEDIFF": _build_datediff, 445 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 446 "TIMESTAMPDIFF": _build_datediff, 447 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 448 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 449 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 450 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 451 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 452 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 453 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 454 "TRY_TO_TIMESTAMP": _build_datetime( 455 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 456 ), 457 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 458 "TO_NUMBER": lambda args: exp.ToNumber( 459 this=seq_get(args, 0), 460 format=seq_get(args, 1), 461 precision=seq_get(args, 2), 462 scale=seq_get(args, 3), 463 ), 464 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 465 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 466 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 467 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 468 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 469 "TO_VARCHAR": exp.ToChar.from_arg_list, 470 "ZEROIFNULL": _build_if_from_zeroifnull, 471 } 472 473 FUNCTION_PARSERS = { 474 **parser.Parser.FUNCTION_PARSERS, 475 "DATE_PART": lambda self: self._parse_date_part(), 476 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 477 } 478 FUNCTION_PARSERS.pop("TRIM") 479 480 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 481 482 RANGE_PARSERS = { 483 **parser.Parser.RANGE_PARSERS, 484 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 485 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 486 } 487 488 ALTER_PARSERS = { 489 **parser.Parser.ALTER_PARSERS, 490 "UNSET": lambda self: self.expression( 491 exp.Set, 492 tag=self._match_text_seq("TAG"), 493 expressions=self._parse_csv(self._parse_id_var), 494 unset=True, 495 ), 496 } 497 498 STATEMENT_PARSERS = { 499 **parser.Parser.STATEMENT_PARSERS, 500 TokenType.SHOW: lambda self: self._parse_show(), 501 } 502 503 PROPERTY_PARSERS = { 504 **parser.Parser.PROPERTY_PARSERS, 505 "LOCATION": lambda self: self._parse_location_property(), 506 "TAG": lambda self: self._parse_tag(), 507 } 508 509 TYPE_CONVERTERS = { 510 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 511 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 512 } 513 514 SHOW_PARSERS = { 515 "SCHEMAS": _show_parser("SCHEMAS"), 516 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 517 "OBJECTS": _show_parser("OBJECTS"), 518 "TERSE OBJECTS": _show_parser("OBJECTS"), 519 "TABLES": _show_parser("TABLES"), 520 "TERSE TABLES": _show_parser("TABLES"), 521 "VIEWS": _show_parser("VIEWS"), 522 "TERSE VIEWS": _show_parser("VIEWS"), 523 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 524 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 525 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 526 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 527 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 528 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 529 "SEQUENCES": _show_parser("SEQUENCES"), 530 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 531 "COLUMNS": _show_parser("COLUMNS"), 532 "USERS": _show_parser("USERS"), 533 "TERSE USERS": _show_parser("USERS"), 534 } 535 536 CONSTRAINT_PARSERS = { 537 **parser.Parser.CONSTRAINT_PARSERS, 538 "WITH": lambda self: self._parse_with_constraint(), 539 "MASKING": lambda self: self._parse_with_constraint(), 540 "PROJECTION": lambda self: self._parse_with_constraint(), 541 "TAG": lambda self: self._parse_with_constraint(), 542 } 543 544 STAGED_FILE_SINGLE_TOKENS = { 545 TokenType.DOT, 546 TokenType.MOD, 547 TokenType.SLASH, 548 } 549 550 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 551 552 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 553 554 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 555 556 LAMBDAS = { 557 **parser.Parser.LAMBDAS, 558 TokenType.ARROW: lambda self, expressions: self.expression( 559 exp.Lambda, 560 this=self._replace_lambda( 561 self._parse_assignment(), 562 expressions, 563 ), 564 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 565 ), 566 } 567 568 def _negate_range( 569 self, this: t.Optional[exp.Expression] = None 570 ) -> t.Optional[exp.Expression]: 571 if not this: 572 return this 573 574 query = this.args.get("query") 575 if isinstance(this, exp.In) and isinstance(query, exp.Query): 576 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 577 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 578 # which can produce different results (most likely a SnowFlake bug). 579 # 580 # https://docs.snowflake.com/en/sql-reference/functions/in 581 # Context: https://github.com/tobymao/sqlglot/issues/3890 582 return self.expression( 583 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 584 ) 585 586 return self.expression(exp.Not, this=this) 587 588 def _parse_tag(self) -> exp.Tags: 589 return self.expression( 590 exp.Tags, 591 expressions=self._parse_wrapped_csv(self._parse_property), 592 ) 593 594 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 595 if self._prev.token_type != TokenType.WITH: 596 self._retreat(self._index - 1) 597 598 if self._match_text_seq("MASKING", "POLICY"): 599 policy = self._parse_column() 600 return self.expression( 601 exp.MaskingPolicyColumnConstraint, 602 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 603 expressions=self._match(TokenType.USING) 604 and self._parse_wrapped_csv(self._parse_id_var), 605 ) 606 if self._match_text_seq("PROJECTION", "POLICY"): 607 policy = self._parse_column() 608 return self.expression( 609 exp.ProjectionPolicyColumnConstraint, 610 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 611 ) 612 if self._match(TokenType.TAG): 613 return self._parse_tag() 614 615 return None 616 617 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 618 if self._match(TokenType.TAG): 619 return self._parse_tag() 620 621 return super()._parse_with_property() 622 623 def _parse_create(self) -> exp.Create | exp.Command: 624 expression = super()._parse_create() 625 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 626 # Replace the Table node with the enclosed Identifier 627 expression.this.replace(expression.this.this) 628 629 return expression 630 631 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 632 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 633 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 634 this = self._parse_var() or self._parse_type() 635 636 if not this: 637 return None 638 639 self._match(TokenType.COMMA) 640 expression = self._parse_bitwise() 641 this = map_date_part(this) 642 name = this.name.upper() 643 644 if name.startswith("EPOCH"): 645 if name == "EPOCH_MILLISECOND": 646 scale = 10**3 647 elif name == "EPOCH_MICROSECOND": 648 scale = 10**6 649 elif name == "EPOCH_NANOSECOND": 650 scale = 10**9 651 else: 652 scale = None 653 654 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 655 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 656 657 if scale: 658 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 659 660 return to_unix 661 662 return self.expression(exp.Extract, this=this, expression=expression) 663 664 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 665 if is_map: 666 # Keys are strings in Snowflake's objects, see also: 667 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 668 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 669 return self._parse_slice(self._parse_string()) 670 671 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 672 673 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 674 lateral = super()._parse_lateral() 675 if not lateral: 676 return lateral 677 678 if isinstance(lateral.this, exp.Explode): 679 table_alias = lateral.args.get("alias") 680 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 681 if table_alias and not table_alias.args.get("columns"): 682 table_alias.set("columns", columns) 683 elif not table_alias: 684 exp.alias_(lateral, "_flattened", table=columns, copy=False) 685 686 return lateral 687 688 def _parse_table_parts( 689 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 690 ) -> exp.Table: 691 # https://docs.snowflake.com/en/user-guide/querying-stage 692 if self._match(TokenType.STRING, advance=False): 693 table = self._parse_string() 694 elif self._match_text_seq("@", advance=False): 695 table = self._parse_location_path() 696 else: 697 table = None 698 699 if table: 700 file_format = None 701 pattern = None 702 703 wrapped = self._match(TokenType.L_PAREN) 704 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 705 if self._match_text_seq("FILE_FORMAT", "=>"): 706 file_format = self._parse_string() or super()._parse_table_parts( 707 is_db_reference=is_db_reference 708 ) 709 elif self._match_text_seq("PATTERN", "=>"): 710 pattern = self._parse_string() 711 else: 712 break 713 714 self._match(TokenType.COMMA) 715 716 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 717 else: 718 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 719 720 return table 721 722 def _parse_id_var( 723 self, 724 any_token: bool = True, 725 tokens: t.Optional[t.Collection[TokenType]] = None, 726 ) -> t.Optional[exp.Expression]: 727 if self._match_text_seq("IDENTIFIER", "("): 728 identifier = ( 729 super()._parse_id_var(any_token=any_token, tokens=tokens) 730 or self._parse_string() 731 ) 732 self._match_r_paren() 733 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 734 735 return super()._parse_id_var(any_token=any_token, tokens=tokens) 736 737 def _parse_show_snowflake(self, this: str) -> exp.Show: 738 scope = None 739 scope_kind = None 740 741 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 742 # which is syntactically valid but has no effect on the output 743 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 744 745 history = self._match_text_seq("HISTORY") 746 747 like = self._parse_string() if self._match(TokenType.LIKE) else None 748 749 if self._match(TokenType.IN): 750 if self._match_text_seq("ACCOUNT"): 751 scope_kind = "ACCOUNT" 752 elif self._match_set(self.DB_CREATABLES): 753 scope_kind = self._prev.text.upper() 754 if self._curr: 755 scope = self._parse_table_parts() 756 elif self._curr: 757 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 758 scope = self._parse_table_parts() 759 760 return self.expression( 761 exp.Show, 762 **{ 763 "terse": terse, 764 "this": this, 765 "history": history, 766 "like": like, 767 "scope": scope, 768 "scope_kind": scope_kind, 769 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 770 "limit": self._parse_limit(), 771 "from": self._parse_string() if self._match(TokenType.FROM) else None, 772 }, 773 ) 774 775 def _parse_location_property(self) -> exp.LocationProperty: 776 self._match(TokenType.EQ) 777 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 778 779 def _parse_file_location(self) -> t.Optional[exp.Expression]: 780 # Parse either a subquery or a staged file 781 return ( 782 self._parse_select(table=True, parse_subquery_alias=False) 783 if self._match(TokenType.L_PAREN, advance=False) 784 else self._parse_table_parts() 785 ) 786 787 def _parse_location_path(self) -> exp.Var: 788 parts = [self._advance_any(ignore_reserved=True)] 789 790 # We avoid consuming a comma token because external tables like @foo and @bar 791 # can be joined in a query with a comma separator, as well as closing paren 792 # in case of subqueries 793 while self._is_connected() and not self._match_set( 794 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 795 ): 796 parts.append(self._advance_any(ignore_reserved=True)) 797 798 return exp.var("".join(part.text for part in parts if part)) 799 800 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 801 this = super()._parse_lambda_arg() 802 803 if not this: 804 return this 805 806 typ = self._parse_types() 807 808 if typ: 809 return self.expression(exp.Cast, this=this, to=typ) 810 811 return this 812 813 def _parse_foreign_key(self) -> exp.ForeignKey: 814 # inlineFK, the REFERENCES columns are implied 815 if self._match(TokenType.REFERENCES, advance=False): 816 return self.expression(exp.ForeignKey) 817 818 # outoflineFK, explicitly names the columns 819 return super()._parse_foreign_key() 820 821 class Tokenizer(tokens.Tokenizer): 822 STRING_ESCAPES = ["\\", "'"] 823 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 824 RAW_STRINGS = ["$$"] 825 COMMENTS = ["--", "//", ("/*", "*/")] 826 NESTED_COMMENTS = False 827 828 KEYWORDS = { 829 **tokens.Tokenizer.KEYWORDS, 830 "BYTEINT": TokenType.INT, 831 "CHAR VARYING": TokenType.VARCHAR, 832 "CHARACTER VARYING": TokenType.VARCHAR, 833 "EXCLUDE": TokenType.EXCEPT, 834 "ILIKE ANY": TokenType.ILIKE_ANY, 835 "LIKE ANY": TokenType.LIKE_ANY, 836 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 837 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 838 "MINUS": TokenType.EXCEPT, 839 "NCHAR VARYING": TokenType.VARCHAR, 840 "PUT": TokenType.COMMAND, 841 "REMOVE": TokenType.COMMAND, 842 "RM": TokenType.COMMAND, 843 "SAMPLE": TokenType.TABLE_SAMPLE, 844 "SQL_DOUBLE": TokenType.DOUBLE, 845 "SQL_VARCHAR": TokenType.VARCHAR, 846 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 847 "TAG": TokenType.TAG, 848 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 849 "TOP": TokenType.TOP, 850 "WAREHOUSE": TokenType.WAREHOUSE, 851 "STREAMLIT": TokenType.STREAMLIT, 852 } 853 KEYWORDS.pop("/*+") 854 855 SINGLE_TOKENS = { 856 **tokens.Tokenizer.SINGLE_TOKENS, 857 "$": TokenType.PARAMETER, 858 } 859 860 VAR_SINGLE_TOKENS = {"$"} 861 862 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 863 864 class Generator(generator.Generator): 865 PARAMETER_TOKEN = "$" 866 MATCHED_BY_SOURCE = False 867 SINGLE_STRING_INTERVAL = True 868 JOIN_HINTS = False 869 TABLE_HINTS = False 870 QUERY_HINTS = False 871 AGGREGATE_FILTER_SUPPORTED = False 872 SUPPORTS_TABLE_COPY = False 873 COLLATE_IS_FUNC = True 874 LIMIT_ONLY_LITERALS = True 875 JSON_KEY_VALUE_PAIR_SEP = "," 876 INSERT_OVERWRITE = " OVERWRITE INTO" 877 STRUCT_DELIMITER = ("(", ")") 878 COPY_PARAMS_ARE_WRAPPED = False 879 COPY_PARAMS_EQ_REQUIRED = True 880 STAR_EXCEPT = "EXCLUDE" 881 SUPPORTS_EXPLODING_PROJECTIONS = False 882 ARRAY_CONCAT_IS_VAR_LEN = False 883 SUPPORTS_CONVERT_TIMEZONE = True 884 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 885 SUPPORTS_MEDIAN = True 886 ARRAY_SIZE_NAME = "ARRAY_SIZE" 887 888 TRANSFORMS = { 889 **generator.Generator.TRANSFORMS, 890 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 891 exp.ArgMax: rename_func("MAX_BY"), 892 exp.ArgMin: rename_func("MIN_BY"), 893 exp.Array: inline_array_sql, 894 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 895 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 896 exp.AtTimeZone: lambda self, e: self.func( 897 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 898 ), 899 exp.BitwiseOr: rename_func("BITOR"), 900 exp.BitwiseXor: rename_func("BITXOR"), 901 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 902 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 903 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 904 exp.DateAdd: date_delta_sql("DATEADD"), 905 exp.DateDiff: date_delta_sql("DATEDIFF"), 906 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 907 exp.DatetimeDiff: timestampdiff_sql, 908 exp.DateStrToDate: datestrtodate_sql, 909 exp.DayOfMonth: rename_func("DAYOFMONTH"), 910 exp.DayOfWeek: rename_func("DAYOFWEEK"), 911 exp.DayOfYear: rename_func("DAYOFYEAR"), 912 exp.Explode: rename_func("FLATTEN"), 913 exp.Extract: rename_func("DATE_PART"), 914 exp.FromTimeZone: lambda self, e: self.func( 915 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 916 ), 917 exp.GenerateSeries: lambda self, e: self.func( 918 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 919 ), 920 exp.GroupConcat: rename_func("LISTAGG"), 921 exp.If: if_sql(name="IFF", false_value="NULL"), 922 exp.JSONExtractArray: _json_extract_value_array_sql, 923 exp.JSONExtractScalar: lambda self, e: self.func( 924 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 925 ), 926 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 927 exp.JSONPathRoot: lambda *_: "", 928 exp.JSONValueArray: _json_extract_value_array_sql, 929 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 930 exp.LogicalOr: rename_func("BOOLOR_AGG"), 931 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 932 exp.MakeInterval: no_make_interval_sql, 933 exp.Max: max_or_greatest, 934 exp.Min: min_or_least, 935 exp.ParseJSON: lambda self, e: self.func( 936 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 937 ), 938 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 939 exp.PercentileCont: transforms.preprocess( 940 [transforms.add_within_group_for_percentiles] 941 ), 942 exp.PercentileDisc: transforms.preprocess( 943 [transforms.add_within_group_for_percentiles] 944 ), 945 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 946 exp.RegexpExtract: _regexpextract_sql, 947 exp.RegexpExtractAll: _regexpextract_sql, 948 exp.RegexpILike: _regexpilike_sql, 949 exp.Rand: rename_func("RANDOM"), 950 exp.Select: transforms.preprocess( 951 [ 952 transforms.eliminate_distinct_on, 953 transforms.explode_to_unnest(), 954 transforms.eliminate_semi_and_anti_joins, 955 _transform_generate_date_array, 956 ] 957 ), 958 exp.SHA: rename_func("SHA1"), 959 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 960 exp.StartsWith: rename_func("STARTSWITH"), 961 exp.StrPosition: lambda self, e: strposition_sql( 962 self, e, func_name="CHARINDEX", supports_position=True 963 ), 964 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 965 exp.Stuff: rename_func("INSERT"), 966 exp.TimeAdd: date_delta_sql("TIMEADD"), 967 exp.Timestamp: no_timestamp_sql, 968 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 969 exp.TimestampDiff: lambda self, e: self.func( 970 "TIMESTAMPDIFF", e.unit, e.expression, e.this 971 ), 972 exp.TimestampTrunc: timestamptrunc_sql(), 973 exp.TimeStrToTime: timestrtotime_sql, 974 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 975 exp.ToArray: rename_func("TO_ARRAY"), 976 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 977 exp.ToDouble: rename_func("TO_DOUBLE"), 978 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 979 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 980 exp.TsOrDsToDate: lambda self, e: self.func( 981 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 982 ), 983 exp.TsOrDsToTime: lambda self, e: self.func( 984 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 985 ), 986 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 987 exp.Uuid: rename_func("UUID_STRING"), 988 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 989 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 990 exp.Xor: rename_func("BOOLXOR"), 991 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 992 rename_func("EDITDISTANCE") 993 ), 994 } 995 996 SUPPORTED_JSON_PATH_PARTS = { 997 exp.JSONPathKey, 998 exp.JSONPathRoot, 999 exp.JSONPathSubscript, 1000 } 1001 1002 TYPE_MAPPING = { 1003 **generator.Generator.TYPE_MAPPING, 1004 exp.DataType.Type.NESTED: "OBJECT", 1005 exp.DataType.Type.STRUCT: "OBJECT", 1006 } 1007 1008 PROPERTIES_LOCATION = { 1009 **generator.Generator.PROPERTIES_LOCATION, 1010 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1011 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1012 } 1013 1014 UNSUPPORTED_VALUES_EXPRESSIONS = { 1015 exp.Map, 1016 exp.StarMap, 1017 exp.Struct, 1018 exp.VarMap, 1019 } 1020 1021 def with_properties(self, properties: exp.Properties) -> str: 1022 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1023 1024 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1025 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1026 values_as_table = False 1027 1028 return super().values_sql(expression, values_as_table=values_as_table) 1029 1030 def datatype_sql(self, expression: exp.DataType) -> str: 1031 expressions = expression.expressions 1032 if ( 1033 expressions 1034 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1035 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1036 ): 1037 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1038 return "OBJECT" 1039 1040 return super().datatype_sql(expression) 1041 1042 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1043 return self.func( 1044 "TO_NUMBER", 1045 expression.this, 1046 expression.args.get("format"), 1047 expression.args.get("precision"), 1048 expression.args.get("scale"), 1049 ) 1050 1051 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1052 milli = expression.args.get("milli") 1053 if milli is not None: 1054 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1055 expression.set("nano", milli_to_nano) 1056 1057 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1058 1059 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1060 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1061 return self.func("TO_GEOGRAPHY", expression.this) 1062 if expression.is_type(exp.DataType.Type.GEOMETRY): 1063 return self.func("TO_GEOMETRY", expression.this) 1064 1065 return super().cast_sql(expression, safe_prefix=safe_prefix) 1066 1067 def trycast_sql(self, expression: exp.TryCast) -> str: 1068 value = expression.this 1069 1070 if value.type is None: 1071 from sqlglot.optimizer.annotate_types import annotate_types 1072 1073 value = annotate_types(value) 1074 1075 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1076 return super().trycast_sql(expression) 1077 1078 # TRY_CAST only works for string values in Snowflake 1079 return self.cast_sql(expression) 1080 1081 def log_sql(self, expression: exp.Log) -> str: 1082 if not expression.expression: 1083 return self.func("LN", expression.this) 1084 1085 return super().log_sql(expression) 1086 1087 def unnest_sql(self, expression: exp.Unnest) -> str: 1088 unnest_alias = expression.args.get("alias") 1089 offset = expression.args.get("offset") 1090 1091 columns = [ 1092 exp.to_identifier("seq"), 1093 exp.to_identifier("key"), 1094 exp.to_identifier("path"), 1095 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1096 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1097 or exp.to_identifier("value"), 1098 exp.to_identifier("this"), 1099 ] 1100 1101 if unnest_alias: 1102 unnest_alias.set("columns", columns) 1103 else: 1104 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1105 1106 table_input = self.sql(expression.expressions[0]) 1107 if not table_input.startswith("INPUT =>"): 1108 table_input = f"INPUT => {table_input}" 1109 1110 explode = f"TABLE(FLATTEN({table_input}))" 1111 alias = self.sql(unnest_alias) 1112 alias = f" AS {alias}" if alias else "" 1113 return f"{explode}{alias}" 1114 1115 def show_sql(self, expression: exp.Show) -> str: 1116 terse = "TERSE " if expression.args.get("terse") else "" 1117 history = " HISTORY" if expression.args.get("history") else "" 1118 like = self.sql(expression, "like") 1119 like = f" LIKE {like}" if like else "" 1120 1121 scope = self.sql(expression, "scope") 1122 scope = f" {scope}" if scope else "" 1123 1124 scope_kind = self.sql(expression, "scope_kind") 1125 if scope_kind: 1126 scope_kind = f" IN {scope_kind}" 1127 1128 starts_with = self.sql(expression, "starts_with") 1129 if starts_with: 1130 starts_with = f" STARTS WITH {starts_with}" 1131 1132 limit = self.sql(expression, "limit") 1133 1134 from_ = self.sql(expression, "from") 1135 if from_: 1136 from_ = f" FROM {from_}" 1137 1138 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1139 1140 def describe_sql(self, expression: exp.Describe) -> str: 1141 # Default to table if kind is unknown 1142 kind_value = expression.args.get("kind") or "TABLE" 1143 kind = f" {kind_value}" if kind_value else "" 1144 this = f" {self.sql(expression, 'this')}" 1145 expressions = self.expressions(expression, flat=True) 1146 expressions = f" {expressions}" if expressions else "" 1147 return f"DESCRIBE{kind}{this}{expressions}" 1148 1149 def generatedasidentitycolumnconstraint_sql( 1150 self, expression: exp.GeneratedAsIdentityColumnConstraint 1151 ) -> str: 1152 start = expression.args.get("start") 1153 start = f" START {start}" if start else "" 1154 increment = expression.args.get("increment") 1155 increment = f" INCREMENT {increment}" if increment else "" 1156 return f"AUTOINCREMENT{start}{increment}" 1157 1158 def cluster_sql(self, expression: exp.Cluster) -> str: 1159 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1160 1161 def struct_sql(self, expression: exp.Struct) -> str: 1162 keys = [] 1163 values = [] 1164 1165 for i, e in enumerate(expression.expressions): 1166 if isinstance(e, exp.PropertyEQ): 1167 keys.append( 1168 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1169 ) 1170 values.append(e.expression) 1171 else: 1172 keys.append(exp.Literal.string(f"_{i}")) 1173 values.append(e) 1174 1175 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1176 1177 @unsupported_args("weight", "accuracy") 1178 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1179 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1180 1181 def alterset_sql(self, expression: exp.AlterSet) -> str: 1182 exprs = self.expressions(expression, flat=True) 1183 exprs = f" {exprs}" if exprs else "" 1184 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1185 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1186 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1187 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1188 tag = self.expressions(expression, key="tag", flat=True) 1189 tag = f" TAG {tag}" if tag else "" 1190 1191 return f"SET{exprs}{file_format}{copy_options}{tag}" 1192 1193 def strtotime_sql(self, expression: exp.StrToTime): 1194 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1195 return self.func( 1196 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1197 ) 1198 1199 def timestampsub_sql(self, expression: exp.TimestampSub): 1200 return self.sql( 1201 exp.TimestampAdd( 1202 this=expression.this, 1203 expression=expression.expression * -1, 1204 unit=expression.unit, 1205 ) 1206 ) 1207 1208 def jsonextract_sql(self, expression: exp.JSONExtract): 1209 this = expression.this 1210 1211 # JSON strings are valid coming from other dialects such as BQ 1212 return self.func( 1213 "GET_PATH", 1214 exp.ParseJSON(this=this) if this.is_string else this, 1215 expression.expression, 1216 ) 1217 1218 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1219 this = expression.this 1220 if not isinstance(this, exp.TsOrDsToTimestamp): 1221 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1222 1223 return self.func("TO_CHAR", this, self.format_time(expression)) 1224 1225 def datesub_sql(self, expression: exp.DateSub) -> str: 1226 value = expression.expression 1227 if value: 1228 value.replace(value * (-1)) 1229 else: 1230 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1231 1232 return date_delta_sql("DATEADD")(self, expression) 1233 1234 def select_sql(self, expression: exp.Select) -> str: 1235 limit = expression.args.get("limit") 1236 offset = expression.args.get("offset") 1237 if offset and not limit: 1238 expression.limit(exp.Null(), copy=False) 1239 return super().select_sql(expression)
327class Snowflake(Dialect): 328 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 329 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 330 NULL_ORDERING = "nulls_are_large" 331 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 332 SUPPORTS_USER_DEFINED_TYPES = False 333 SUPPORTS_SEMI_ANTI_JOIN = False 334 PREFER_CTE_ALIAS_COLUMN = True 335 TABLESAMPLE_SIZE_IS_PERCENT = True 336 COPY_PARAMS_ARE_CSV = False 337 ARRAY_AGG_INCLUDES_NULLS = None 338 339 TIME_MAPPING = { 340 "YYYY": "%Y", 341 "yyyy": "%Y", 342 "YY": "%y", 343 "yy": "%y", 344 "MMMM": "%B", 345 "mmmm": "%B", 346 "MON": "%b", 347 "mon": "%b", 348 "MM": "%m", 349 "mm": "%m", 350 "DD": "%d", 351 "dd": "%-d", 352 "DY": "%a", 353 "dy": "%w", 354 "HH24": "%H", 355 "hh24": "%H", 356 "HH12": "%I", 357 "hh12": "%I", 358 "MI": "%M", 359 "mi": "%M", 360 "SS": "%S", 361 "ss": "%S", 362 "FF6": "%f", 363 "ff6": "%f", 364 } 365 366 def quote_identifier(self, expression: E, identify: bool = True) -> E: 367 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 368 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 369 if ( 370 isinstance(expression, exp.Identifier) 371 and isinstance(expression.parent, exp.Table) 372 and expression.name.lower() == "dual" 373 ): 374 return expression # type: ignore 375 376 return super().quote_identifier(expression, identify=identify) 377 378 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 379 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 380 SINGLE_TOKENS.pop("$") 381 382 class Parser(parser.Parser): 383 IDENTIFY_PIVOT_STRINGS = True 384 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 385 COLON_IS_VARIANT_EXTRACT = True 386 387 ID_VAR_TOKENS = { 388 *parser.Parser.ID_VAR_TOKENS, 389 TokenType.MATCH_CONDITION, 390 } 391 392 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 393 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 394 395 FUNCTIONS = { 396 **parser.Parser.FUNCTIONS, 397 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 398 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 399 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 400 this=seq_get(args, 1), expression=seq_get(args, 0) 401 ), 402 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 403 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 404 start=seq_get(args, 0), 405 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 406 step=seq_get(args, 2), 407 ), 408 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 409 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 410 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 411 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 412 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 413 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 414 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 415 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 416 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 417 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 418 "DATE_TRUNC": _date_trunc_to_time, 419 "DATEADD": _build_date_time_add(exp.DateAdd), 420 "DATEDIFF": _build_datediff, 421 "DIV0": _build_if_from_div0, 422 "EDITDISTANCE": lambda args: exp.Levenshtein( 423 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 424 ), 425 "FLATTEN": exp.Explode.from_arg_list, 426 "GET_PATH": lambda args, dialect: exp.JSONExtract( 427 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 428 ), 429 "IFF": exp.If.from_arg_list, 430 "LAST_DAY": lambda args: exp.LastDay( 431 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 432 ), 433 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 434 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 435 "LISTAGG": exp.GroupConcat.from_arg_list, 436 "NULLIFZERO": _build_if_from_nullifzero, 437 "OBJECT_CONSTRUCT": _build_object_construct, 438 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 439 "REGEXP_REPLACE": _build_regexp_replace, 440 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 441 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 442 "RLIKE": exp.RegexpLike.from_arg_list, 443 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 444 "TIMEADD": _build_date_time_add(exp.TimeAdd), 445 "TIMEDIFF": _build_datediff, 446 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 447 "TIMESTAMPDIFF": _build_datediff, 448 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 449 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 450 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 451 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 452 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 453 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 454 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 455 "TRY_TO_TIMESTAMP": _build_datetime( 456 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 457 ), 458 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 459 "TO_NUMBER": lambda args: exp.ToNumber( 460 this=seq_get(args, 0), 461 format=seq_get(args, 1), 462 precision=seq_get(args, 2), 463 scale=seq_get(args, 3), 464 ), 465 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 466 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 467 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 468 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 469 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 470 "TO_VARCHAR": exp.ToChar.from_arg_list, 471 "ZEROIFNULL": _build_if_from_zeroifnull, 472 } 473 474 FUNCTION_PARSERS = { 475 **parser.Parser.FUNCTION_PARSERS, 476 "DATE_PART": lambda self: self._parse_date_part(), 477 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 478 } 479 FUNCTION_PARSERS.pop("TRIM") 480 481 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 482 483 RANGE_PARSERS = { 484 **parser.Parser.RANGE_PARSERS, 485 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 486 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 487 } 488 489 ALTER_PARSERS = { 490 **parser.Parser.ALTER_PARSERS, 491 "UNSET": lambda self: self.expression( 492 exp.Set, 493 tag=self._match_text_seq("TAG"), 494 expressions=self._parse_csv(self._parse_id_var), 495 unset=True, 496 ), 497 } 498 499 STATEMENT_PARSERS = { 500 **parser.Parser.STATEMENT_PARSERS, 501 TokenType.SHOW: lambda self: self._parse_show(), 502 } 503 504 PROPERTY_PARSERS = { 505 **parser.Parser.PROPERTY_PARSERS, 506 "LOCATION": lambda self: self._parse_location_property(), 507 "TAG": lambda self: self._parse_tag(), 508 } 509 510 TYPE_CONVERTERS = { 511 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 512 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 513 } 514 515 SHOW_PARSERS = { 516 "SCHEMAS": _show_parser("SCHEMAS"), 517 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 518 "OBJECTS": _show_parser("OBJECTS"), 519 "TERSE OBJECTS": _show_parser("OBJECTS"), 520 "TABLES": _show_parser("TABLES"), 521 "TERSE TABLES": _show_parser("TABLES"), 522 "VIEWS": _show_parser("VIEWS"), 523 "TERSE VIEWS": _show_parser("VIEWS"), 524 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 525 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 526 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 527 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 528 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 529 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 530 "SEQUENCES": _show_parser("SEQUENCES"), 531 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 532 "COLUMNS": _show_parser("COLUMNS"), 533 "USERS": _show_parser("USERS"), 534 "TERSE USERS": _show_parser("USERS"), 535 } 536 537 CONSTRAINT_PARSERS = { 538 **parser.Parser.CONSTRAINT_PARSERS, 539 "WITH": lambda self: self._parse_with_constraint(), 540 "MASKING": lambda self: self._parse_with_constraint(), 541 "PROJECTION": lambda self: self._parse_with_constraint(), 542 "TAG": lambda self: self._parse_with_constraint(), 543 } 544 545 STAGED_FILE_SINGLE_TOKENS = { 546 TokenType.DOT, 547 TokenType.MOD, 548 TokenType.SLASH, 549 } 550 551 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 552 553 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 554 555 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 556 557 LAMBDAS = { 558 **parser.Parser.LAMBDAS, 559 TokenType.ARROW: lambda self, expressions: self.expression( 560 exp.Lambda, 561 this=self._replace_lambda( 562 self._parse_assignment(), 563 expressions, 564 ), 565 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 566 ), 567 } 568 569 def _negate_range( 570 self, this: t.Optional[exp.Expression] = None 571 ) -> t.Optional[exp.Expression]: 572 if not this: 573 return this 574 575 query = this.args.get("query") 576 if isinstance(this, exp.In) and isinstance(query, exp.Query): 577 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 578 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 579 # which can produce different results (most likely a SnowFlake bug). 580 # 581 # https://docs.snowflake.com/en/sql-reference/functions/in 582 # Context: https://github.com/tobymao/sqlglot/issues/3890 583 return self.expression( 584 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 585 ) 586 587 return self.expression(exp.Not, this=this) 588 589 def _parse_tag(self) -> exp.Tags: 590 return self.expression( 591 exp.Tags, 592 expressions=self._parse_wrapped_csv(self._parse_property), 593 ) 594 595 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 596 if self._prev.token_type != TokenType.WITH: 597 self._retreat(self._index - 1) 598 599 if self._match_text_seq("MASKING", "POLICY"): 600 policy = self._parse_column() 601 return self.expression( 602 exp.MaskingPolicyColumnConstraint, 603 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 604 expressions=self._match(TokenType.USING) 605 and self._parse_wrapped_csv(self._parse_id_var), 606 ) 607 if self._match_text_seq("PROJECTION", "POLICY"): 608 policy = self._parse_column() 609 return self.expression( 610 exp.ProjectionPolicyColumnConstraint, 611 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 612 ) 613 if self._match(TokenType.TAG): 614 return self._parse_tag() 615 616 return None 617 618 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 619 if self._match(TokenType.TAG): 620 return self._parse_tag() 621 622 return super()._parse_with_property() 623 624 def _parse_create(self) -> exp.Create | exp.Command: 625 expression = super()._parse_create() 626 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 627 # Replace the Table node with the enclosed Identifier 628 expression.this.replace(expression.this.this) 629 630 return expression 631 632 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 633 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 634 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 635 this = self._parse_var() or self._parse_type() 636 637 if not this: 638 return None 639 640 self._match(TokenType.COMMA) 641 expression = self._parse_bitwise() 642 this = map_date_part(this) 643 name = this.name.upper() 644 645 if name.startswith("EPOCH"): 646 if name == "EPOCH_MILLISECOND": 647 scale = 10**3 648 elif name == "EPOCH_MICROSECOND": 649 scale = 10**6 650 elif name == "EPOCH_NANOSECOND": 651 scale = 10**9 652 else: 653 scale = None 654 655 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 656 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 657 658 if scale: 659 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 660 661 return to_unix 662 663 return self.expression(exp.Extract, this=this, expression=expression) 664 665 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 666 if is_map: 667 # Keys are strings in Snowflake's objects, see also: 668 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 669 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 670 return self._parse_slice(self._parse_string()) 671 672 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 673 674 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 675 lateral = super()._parse_lateral() 676 if not lateral: 677 return lateral 678 679 if isinstance(lateral.this, exp.Explode): 680 table_alias = lateral.args.get("alias") 681 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 682 if table_alias and not table_alias.args.get("columns"): 683 table_alias.set("columns", columns) 684 elif not table_alias: 685 exp.alias_(lateral, "_flattened", table=columns, copy=False) 686 687 return lateral 688 689 def _parse_table_parts( 690 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 691 ) -> exp.Table: 692 # https://docs.snowflake.com/en/user-guide/querying-stage 693 if self._match(TokenType.STRING, advance=False): 694 table = self._parse_string() 695 elif self._match_text_seq("@", advance=False): 696 table = self._parse_location_path() 697 else: 698 table = None 699 700 if table: 701 file_format = None 702 pattern = None 703 704 wrapped = self._match(TokenType.L_PAREN) 705 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 706 if self._match_text_seq("FILE_FORMAT", "=>"): 707 file_format = self._parse_string() or super()._parse_table_parts( 708 is_db_reference=is_db_reference 709 ) 710 elif self._match_text_seq("PATTERN", "=>"): 711 pattern = self._parse_string() 712 else: 713 break 714 715 self._match(TokenType.COMMA) 716 717 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 718 else: 719 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 720 721 return table 722 723 def _parse_id_var( 724 self, 725 any_token: bool = True, 726 tokens: t.Optional[t.Collection[TokenType]] = None, 727 ) -> t.Optional[exp.Expression]: 728 if self._match_text_seq("IDENTIFIER", "("): 729 identifier = ( 730 super()._parse_id_var(any_token=any_token, tokens=tokens) 731 or self._parse_string() 732 ) 733 self._match_r_paren() 734 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 735 736 return super()._parse_id_var(any_token=any_token, tokens=tokens) 737 738 def _parse_show_snowflake(self, this: str) -> exp.Show: 739 scope = None 740 scope_kind = None 741 742 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 743 # which is syntactically valid but has no effect on the output 744 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 745 746 history = self._match_text_seq("HISTORY") 747 748 like = self._parse_string() if self._match(TokenType.LIKE) else None 749 750 if self._match(TokenType.IN): 751 if self._match_text_seq("ACCOUNT"): 752 scope_kind = "ACCOUNT" 753 elif self._match_set(self.DB_CREATABLES): 754 scope_kind = self._prev.text.upper() 755 if self._curr: 756 scope = self._parse_table_parts() 757 elif self._curr: 758 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 759 scope = self._parse_table_parts() 760 761 return self.expression( 762 exp.Show, 763 **{ 764 "terse": terse, 765 "this": this, 766 "history": history, 767 "like": like, 768 "scope": scope, 769 "scope_kind": scope_kind, 770 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 771 "limit": self._parse_limit(), 772 "from": self._parse_string() if self._match(TokenType.FROM) else None, 773 }, 774 ) 775 776 def _parse_location_property(self) -> exp.LocationProperty: 777 self._match(TokenType.EQ) 778 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 779 780 def _parse_file_location(self) -> t.Optional[exp.Expression]: 781 # Parse either a subquery or a staged file 782 return ( 783 self._parse_select(table=True, parse_subquery_alias=False) 784 if self._match(TokenType.L_PAREN, advance=False) 785 else self._parse_table_parts() 786 ) 787 788 def _parse_location_path(self) -> exp.Var: 789 parts = [self._advance_any(ignore_reserved=True)] 790 791 # We avoid consuming a comma token because external tables like @foo and @bar 792 # can be joined in a query with a comma separator, as well as closing paren 793 # in case of subqueries 794 while self._is_connected() and not self._match_set( 795 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 796 ): 797 parts.append(self._advance_any(ignore_reserved=True)) 798 799 return exp.var("".join(part.text for part in parts if part)) 800 801 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 802 this = super()._parse_lambda_arg() 803 804 if not this: 805 return this 806 807 typ = self._parse_types() 808 809 if typ: 810 return self.expression(exp.Cast, this=this, to=typ) 811 812 return this 813 814 def _parse_foreign_key(self) -> exp.ForeignKey: 815 # inlineFK, the REFERENCES columns are implied 816 if self._match(TokenType.REFERENCES, advance=False): 817 return self.expression(exp.ForeignKey) 818 819 # outoflineFK, explicitly names the columns 820 return super()._parse_foreign_key() 821 822 class Tokenizer(tokens.Tokenizer): 823 STRING_ESCAPES = ["\\", "'"] 824 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 825 RAW_STRINGS = ["$$"] 826 COMMENTS = ["--", "//", ("/*", "*/")] 827 NESTED_COMMENTS = False 828 829 KEYWORDS = { 830 **tokens.Tokenizer.KEYWORDS, 831 "BYTEINT": TokenType.INT, 832 "CHAR VARYING": TokenType.VARCHAR, 833 "CHARACTER VARYING": TokenType.VARCHAR, 834 "EXCLUDE": TokenType.EXCEPT, 835 "ILIKE ANY": TokenType.ILIKE_ANY, 836 "LIKE ANY": TokenType.LIKE_ANY, 837 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 838 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 839 "MINUS": TokenType.EXCEPT, 840 "NCHAR VARYING": TokenType.VARCHAR, 841 "PUT": TokenType.COMMAND, 842 "REMOVE": TokenType.COMMAND, 843 "RM": TokenType.COMMAND, 844 "SAMPLE": TokenType.TABLE_SAMPLE, 845 "SQL_DOUBLE": TokenType.DOUBLE, 846 "SQL_VARCHAR": TokenType.VARCHAR, 847 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 848 "TAG": TokenType.TAG, 849 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 850 "TOP": TokenType.TOP, 851 "WAREHOUSE": TokenType.WAREHOUSE, 852 "STREAMLIT": TokenType.STREAMLIT, 853 } 854 KEYWORDS.pop("/*+") 855 856 SINGLE_TOKENS = { 857 **tokens.Tokenizer.SINGLE_TOKENS, 858 "$": TokenType.PARAMETER, 859 } 860 861 VAR_SINGLE_TOKENS = {"$"} 862 863 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 864 865 class Generator(generator.Generator): 866 PARAMETER_TOKEN = "$" 867 MATCHED_BY_SOURCE = False 868 SINGLE_STRING_INTERVAL = True 869 JOIN_HINTS = False 870 TABLE_HINTS = False 871 QUERY_HINTS = False 872 AGGREGATE_FILTER_SUPPORTED = False 873 SUPPORTS_TABLE_COPY = False 874 COLLATE_IS_FUNC = True 875 LIMIT_ONLY_LITERALS = True 876 JSON_KEY_VALUE_PAIR_SEP = "," 877 INSERT_OVERWRITE = " OVERWRITE INTO" 878 STRUCT_DELIMITER = ("(", ")") 879 COPY_PARAMS_ARE_WRAPPED = False 880 COPY_PARAMS_EQ_REQUIRED = True 881 STAR_EXCEPT = "EXCLUDE" 882 SUPPORTS_EXPLODING_PROJECTIONS = False 883 ARRAY_CONCAT_IS_VAR_LEN = False 884 SUPPORTS_CONVERT_TIMEZONE = True 885 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 886 SUPPORTS_MEDIAN = True 887 ARRAY_SIZE_NAME = "ARRAY_SIZE" 888 889 TRANSFORMS = { 890 **generator.Generator.TRANSFORMS, 891 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 892 exp.ArgMax: rename_func("MAX_BY"), 893 exp.ArgMin: rename_func("MIN_BY"), 894 exp.Array: inline_array_sql, 895 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 896 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 897 exp.AtTimeZone: lambda self, e: self.func( 898 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 899 ), 900 exp.BitwiseOr: rename_func("BITOR"), 901 exp.BitwiseXor: rename_func("BITXOR"), 902 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 903 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 904 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 905 exp.DateAdd: date_delta_sql("DATEADD"), 906 exp.DateDiff: date_delta_sql("DATEDIFF"), 907 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 908 exp.DatetimeDiff: timestampdiff_sql, 909 exp.DateStrToDate: datestrtodate_sql, 910 exp.DayOfMonth: rename_func("DAYOFMONTH"), 911 exp.DayOfWeek: rename_func("DAYOFWEEK"), 912 exp.DayOfYear: rename_func("DAYOFYEAR"), 913 exp.Explode: rename_func("FLATTEN"), 914 exp.Extract: rename_func("DATE_PART"), 915 exp.FromTimeZone: lambda self, e: self.func( 916 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 917 ), 918 exp.GenerateSeries: lambda self, e: self.func( 919 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 920 ), 921 exp.GroupConcat: rename_func("LISTAGG"), 922 exp.If: if_sql(name="IFF", false_value="NULL"), 923 exp.JSONExtractArray: _json_extract_value_array_sql, 924 exp.JSONExtractScalar: lambda self, e: self.func( 925 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 926 ), 927 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 928 exp.JSONPathRoot: lambda *_: "", 929 exp.JSONValueArray: _json_extract_value_array_sql, 930 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 931 exp.LogicalOr: rename_func("BOOLOR_AGG"), 932 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 933 exp.MakeInterval: no_make_interval_sql, 934 exp.Max: max_or_greatest, 935 exp.Min: min_or_least, 936 exp.ParseJSON: lambda self, e: self.func( 937 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 938 ), 939 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 940 exp.PercentileCont: transforms.preprocess( 941 [transforms.add_within_group_for_percentiles] 942 ), 943 exp.PercentileDisc: transforms.preprocess( 944 [transforms.add_within_group_for_percentiles] 945 ), 946 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 947 exp.RegexpExtract: _regexpextract_sql, 948 exp.RegexpExtractAll: _regexpextract_sql, 949 exp.RegexpILike: _regexpilike_sql, 950 exp.Rand: rename_func("RANDOM"), 951 exp.Select: transforms.preprocess( 952 [ 953 transforms.eliminate_distinct_on, 954 transforms.explode_to_unnest(), 955 transforms.eliminate_semi_and_anti_joins, 956 _transform_generate_date_array, 957 ] 958 ), 959 exp.SHA: rename_func("SHA1"), 960 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 961 exp.StartsWith: rename_func("STARTSWITH"), 962 exp.StrPosition: lambda self, e: strposition_sql( 963 self, e, func_name="CHARINDEX", supports_position=True 964 ), 965 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 966 exp.Stuff: rename_func("INSERT"), 967 exp.TimeAdd: date_delta_sql("TIMEADD"), 968 exp.Timestamp: no_timestamp_sql, 969 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 970 exp.TimestampDiff: lambda self, e: self.func( 971 "TIMESTAMPDIFF", e.unit, e.expression, e.this 972 ), 973 exp.TimestampTrunc: timestamptrunc_sql(), 974 exp.TimeStrToTime: timestrtotime_sql, 975 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 976 exp.ToArray: rename_func("TO_ARRAY"), 977 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 978 exp.ToDouble: rename_func("TO_DOUBLE"), 979 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 980 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 981 exp.TsOrDsToDate: lambda self, e: self.func( 982 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 983 ), 984 exp.TsOrDsToTime: lambda self, e: self.func( 985 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 986 ), 987 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 988 exp.Uuid: rename_func("UUID_STRING"), 989 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 990 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 991 exp.Xor: rename_func("BOOLXOR"), 992 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 993 rename_func("EDITDISTANCE") 994 ), 995 } 996 997 SUPPORTED_JSON_PATH_PARTS = { 998 exp.JSONPathKey, 999 exp.JSONPathRoot, 1000 exp.JSONPathSubscript, 1001 } 1002 1003 TYPE_MAPPING = { 1004 **generator.Generator.TYPE_MAPPING, 1005 exp.DataType.Type.NESTED: "OBJECT", 1006 exp.DataType.Type.STRUCT: "OBJECT", 1007 } 1008 1009 PROPERTIES_LOCATION = { 1010 **generator.Generator.PROPERTIES_LOCATION, 1011 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1012 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1013 } 1014 1015 UNSUPPORTED_VALUES_EXPRESSIONS = { 1016 exp.Map, 1017 exp.StarMap, 1018 exp.Struct, 1019 exp.VarMap, 1020 } 1021 1022 def with_properties(self, properties: exp.Properties) -> str: 1023 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1024 1025 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1026 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1027 values_as_table = False 1028 1029 return super().values_sql(expression, values_as_table=values_as_table) 1030 1031 def datatype_sql(self, expression: exp.DataType) -> str: 1032 expressions = expression.expressions 1033 if ( 1034 expressions 1035 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1036 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1037 ): 1038 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1039 return "OBJECT" 1040 1041 return super().datatype_sql(expression) 1042 1043 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1044 return self.func( 1045 "TO_NUMBER", 1046 expression.this, 1047 expression.args.get("format"), 1048 expression.args.get("precision"), 1049 expression.args.get("scale"), 1050 ) 1051 1052 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1053 milli = expression.args.get("milli") 1054 if milli is not None: 1055 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1056 expression.set("nano", milli_to_nano) 1057 1058 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1059 1060 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1061 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1062 return self.func("TO_GEOGRAPHY", expression.this) 1063 if expression.is_type(exp.DataType.Type.GEOMETRY): 1064 return self.func("TO_GEOMETRY", expression.this) 1065 1066 return super().cast_sql(expression, safe_prefix=safe_prefix) 1067 1068 def trycast_sql(self, expression: exp.TryCast) -> str: 1069 value = expression.this 1070 1071 if value.type is None: 1072 from sqlglot.optimizer.annotate_types import annotate_types 1073 1074 value = annotate_types(value) 1075 1076 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1077 return super().trycast_sql(expression) 1078 1079 # TRY_CAST only works for string values in Snowflake 1080 return self.cast_sql(expression) 1081 1082 def log_sql(self, expression: exp.Log) -> str: 1083 if not expression.expression: 1084 return self.func("LN", expression.this) 1085 1086 return super().log_sql(expression) 1087 1088 def unnest_sql(self, expression: exp.Unnest) -> str: 1089 unnest_alias = expression.args.get("alias") 1090 offset = expression.args.get("offset") 1091 1092 columns = [ 1093 exp.to_identifier("seq"), 1094 exp.to_identifier("key"), 1095 exp.to_identifier("path"), 1096 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1097 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1098 or exp.to_identifier("value"), 1099 exp.to_identifier("this"), 1100 ] 1101 1102 if unnest_alias: 1103 unnest_alias.set("columns", columns) 1104 else: 1105 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1106 1107 table_input = self.sql(expression.expressions[0]) 1108 if not table_input.startswith("INPUT =>"): 1109 table_input = f"INPUT => {table_input}" 1110 1111 explode = f"TABLE(FLATTEN({table_input}))" 1112 alias = self.sql(unnest_alias) 1113 alias = f" AS {alias}" if alias else "" 1114 return f"{explode}{alias}" 1115 1116 def show_sql(self, expression: exp.Show) -> str: 1117 terse = "TERSE " if expression.args.get("terse") else "" 1118 history = " HISTORY" if expression.args.get("history") else "" 1119 like = self.sql(expression, "like") 1120 like = f" LIKE {like}" if like else "" 1121 1122 scope = self.sql(expression, "scope") 1123 scope = f" {scope}" if scope else "" 1124 1125 scope_kind = self.sql(expression, "scope_kind") 1126 if scope_kind: 1127 scope_kind = f" IN {scope_kind}" 1128 1129 starts_with = self.sql(expression, "starts_with") 1130 if starts_with: 1131 starts_with = f" STARTS WITH {starts_with}" 1132 1133 limit = self.sql(expression, "limit") 1134 1135 from_ = self.sql(expression, "from") 1136 if from_: 1137 from_ = f" FROM {from_}" 1138 1139 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1140 1141 def describe_sql(self, expression: exp.Describe) -> str: 1142 # Default to table if kind is unknown 1143 kind_value = expression.args.get("kind") or "TABLE" 1144 kind = f" {kind_value}" if kind_value else "" 1145 this = f" {self.sql(expression, 'this')}" 1146 expressions = self.expressions(expression, flat=True) 1147 expressions = f" {expressions}" if expressions else "" 1148 return f"DESCRIBE{kind}{this}{expressions}" 1149 1150 def generatedasidentitycolumnconstraint_sql( 1151 self, expression: exp.GeneratedAsIdentityColumnConstraint 1152 ) -> str: 1153 start = expression.args.get("start") 1154 start = f" START {start}" if start else "" 1155 increment = expression.args.get("increment") 1156 increment = f" INCREMENT {increment}" if increment else "" 1157 return f"AUTOINCREMENT{start}{increment}" 1158 1159 def cluster_sql(self, expression: exp.Cluster) -> str: 1160 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1161 1162 def struct_sql(self, expression: exp.Struct) -> str: 1163 keys = [] 1164 values = [] 1165 1166 for i, e in enumerate(expression.expressions): 1167 if isinstance(e, exp.PropertyEQ): 1168 keys.append( 1169 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1170 ) 1171 values.append(e.expression) 1172 else: 1173 keys.append(exp.Literal.string(f"_{i}")) 1174 values.append(e) 1175 1176 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1177 1178 @unsupported_args("weight", "accuracy") 1179 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1180 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1181 1182 def alterset_sql(self, expression: exp.AlterSet) -> str: 1183 exprs = self.expressions(expression, flat=True) 1184 exprs = f" {exprs}" if exprs else "" 1185 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1186 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1187 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1188 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1189 tag = self.expressions(expression, key="tag", flat=True) 1190 tag = f" TAG {tag}" if tag else "" 1191 1192 return f"SET{exprs}{file_format}{copy_options}{tag}" 1193 1194 def strtotime_sql(self, expression: exp.StrToTime): 1195 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1196 return self.func( 1197 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1198 ) 1199 1200 def timestampsub_sql(self, expression: exp.TimestampSub): 1201 return self.sql( 1202 exp.TimestampAdd( 1203 this=expression.this, 1204 expression=expression.expression * -1, 1205 unit=expression.unit, 1206 ) 1207 ) 1208 1209 def jsonextract_sql(self, expression: exp.JSONExtract): 1210 this = expression.this 1211 1212 # JSON strings are valid coming from other dialects such as BQ 1213 return self.func( 1214 "GET_PATH", 1215 exp.ParseJSON(this=this) if this.is_string else this, 1216 expression.expression, 1217 ) 1218 1219 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1220 this = expression.this 1221 if not isinstance(this, exp.TsOrDsToTimestamp): 1222 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1223 1224 return self.func("TO_CHAR", this, self.format_time(expression)) 1225 1226 def datesub_sql(self, expression: exp.DateSub) -> str: 1227 value = expression.expression 1228 if value: 1229 value.replace(value * (-1)) 1230 else: 1231 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1232 1233 return date_delta_sql("DATEADD")(self, expression) 1234 1235 def select_sql(self, expression: exp.Select) -> str: 1236 limit = expression.args.get("limit") 1237 offset = expression.args.get("offset") 1238 if offset and not limit: 1239 expression.limit(exp.Null(), copy=False) 1240 return super().select_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
366 def quote_identifier(self, expression: E, identify: bool = True) -> E: 367 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 368 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 369 if ( 370 isinstance(expression, exp.Identifier) 371 and isinstance(expression.parent, exp.Table) 372 and expression.name.lower() == "dual" 373 ): 374 return expression # type: ignore 375 376 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- PRESERVE_ORIGINAL_NAMES
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- NUMBERS_CAN_BE_UNDERSCORE_SEPARATED
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
378 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 379 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 380 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
382 class Parser(parser.Parser): 383 IDENTIFY_PIVOT_STRINGS = True 384 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 385 COLON_IS_VARIANT_EXTRACT = True 386 387 ID_VAR_TOKENS = { 388 *parser.Parser.ID_VAR_TOKENS, 389 TokenType.MATCH_CONDITION, 390 } 391 392 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 393 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 394 395 FUNCTIONS = { 396 **parser.Parser.FUNCTIONS, 397 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 398 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 399 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 400 this=seq_get(args, 1), expression=seq_get(args, 0) 401 ), 402 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 403 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 404 start=seq_get(args, 0), 405 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 406 step=seq_get(args, 2), 407 ), 408 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 409 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 410 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 411 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 412 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 413 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 414 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 415 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 416 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 417 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 418 "DATE_TRUNC": _date_trunc_to_time, 419 "DATEADD": _build_date_time_add(exp.DateAdd), 420 "DATEDIFF": _build_datediff, 421 "DIV0": _build_if_from_div0, 422 "EDITDISTANCE": lambda args: exp.Levenshtein( 423 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 424 ), 425 "FLATTEN": exp.Explode.from_arg_list, 426 "GET_PATH": lambda args, dialect: exp.JSONExtract( 427 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 428 ), 429 "IFF": exp.If.from_arg_list, 430 "LAST_DAY": lambda args: exp.LastDay( 431 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 432 ), 433 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 434 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 435 "LISTAGG": exp.GroupConcat.from_arg_list, 436 "NULLIFZERO": _build_if_from_nullifzero, 437 "OBJECT_CONSTRUCT": _build_object_construct, 438 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 439 "REGEXP_REPLACE": _build_regexp_replace, 440 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 441 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 442 "RLIKE": exp.RegexpLike.from_arg_list, 443 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 444 "TIMEADD": _build_date_time_add(exp.TimeAdd), 445 "TIMEDIFF": _build_datediff, 446 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 447 "TIMESTAMPDIFF": _build_datediff, 448 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 449 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 450 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 451 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 452 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 453 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 454 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 455 "TRY_TO_TIMESTAMP": _build_datetime( 456 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 457 ), 458 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 459 "TO_NUMBER": lambda args: exp.ToNumber( 460 this=seq_get(args, 0), 461 format=seq_get(args, 1), 462 precision=seq_get(args, 2), 463 scale=seq_get(args, 3), 464 ), 465 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 466 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 467 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 468 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 469 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 470 "TO_VARCHAR": exp.ToChar.from_arg_list, 471 "ZEROIFNULL": _build_if_from_zeroifnull, 472 } 473 474 FUNCTION_PARSERS = { 475 **parser.Parser.FUNCTION_PARSERS, 476 "DATE_PART": lambda self: self._parse_date_part(), 477 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 478 } 479 FUNCTION_PARSERS.pop("TRIM") 480 481 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 482 483 RANGE_PARSERS = { 484 **parser.Parser.RANGE_PARSERS, 485 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 486 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 487 } 488 489 ALTER_PARSERS = { 490 **parser.Parser.ALTER_PARSERS, 491 "UNSET": lambda self: self.expression( 492 exp.Set, 493 tag=self._match_text_seq("TAG"), 494 expressions=self._parse_csv(self._parse_id_var), 495 unset=True, 496 ), 497 } 498 499 STATEMENT_PARSERS = { 500 **parser.Parser.STATEMENT_PARSERS, 501 TokenType.SHOW: lambda self: self._parse_show(), 502 } 503 504 PROPERTY_PARSERS = { 505 **parser.Parser.PROPERTY_PARSERS, 506 "LOCATION": lambda self: self._parse_location_property(), 507 "TAG": lambda self: self._parse_tag(), 508 } 509 510 TYPE_CONVERTERS = { 511 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 512 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 513 } 514 515 SHOW_PARSERS = { 516 "SCHEMAS": _show_parser("SCHEMAS"), 517 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 518 "OBJECTS": _show_parser("OBJECTS"), 519 "TERSE OBJECTS": _show_parser("OBJECTS"), 520 "TABLES": _show_parser("TABLES"), 521 "TERSE TABLES": _show_parser("TABLES"), 522 "VIEWS": _show_parser("VIEWS"), 523 "TERSE VIEWS": _show_parser("VIEWS"), 524 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 525 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 526 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 527 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 528 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 529 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 530 "SEQUENCES": _show_parser("SEQUENCES"), 531 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 532 "COLUMNS": _show_parser("COLUMNS"), 533 "USERS": _show_parser("USERS"), 534 "TERSE USERS": _show_parser("USERS"), 535 } 536 537 CONSTRAINT_PARSERS = { 538 **parser.Parser.CONSTRAINT_PARSERS, 539 "WITH": lambda self: self._parse_with_constraint(), 540 "MASKING": lambda self: self._parse_with_constraint(), 541 "PROJECTION": lambda self: self._parse_with_constraint(), 542 "TAG": lambda self: self._parse_with_constraint(), 543 } 544 545 STAGED_FILE_SINGLE_TOKENS = { 546 TokenType.DOT, 547 TokenType.MOD, 548 TokenType.SLASH, 549 } 550 551 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 552 553 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 554 555 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 556 557 LAMBDAS = { 558 **parser.Parser.LAMBDAS, 559 TokenType.ARROW: lambda self, expressions: self.expression( 560 exp.Lambda, 561 this=self._replace_lambda( 562 self._parse_assignment(), 563 expressions, 564 ), 565 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 566 ), 567 } 568 569 def _negate_range( 570 self, this: t.Optional[exp.Expression] = None 571 ) -> t.Optional[exp.Expression]: 572 if not this: 573 return this 574 575 query = this.args.get("query") 576 if isinstance(this, exp.In) and isinstance(query, exp.Query): 577 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 578 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 579 # which can produce different results (most likely a SnowFlake bug). 580 # 581 # https://docs.snowflake.com/en/sql-reference/functions/in 582 # Context: https://github.com/tobymao/sqlglot/issues/3890 583 return self.expression( 584 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 585 ) 586 587 return self.expression(exp.Not, this=this) 588 589 def _parse_tag(self) -> exp.Tags: 590 return self.expression( 591 exp.Tags, 592 expressions=self._parse_wrapped_csv(self._parse_property), 593 ) 594 595 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 596 if self._prev.token_type != TokenType.WITH: 597 self._retreat(self._index - 1) 598 599 if self._match_text_seq("MASKING", "POLICY"): 600 policy = self._parse_column() 601 return self.expression( 602 exp.MaskingPolicyColumnConstraint, 603 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 604 expressions=self._match(TokenType.USING) 605 and self._parse_wrapped_csv(self._parse_id_var), 606 ) 607 if self._match_text_seq("PROJECTION", "POLICY"): 608 policy = self._parse_column() 609 return self.expression( 610 exp.ProjectionPolicyColumnConstraint, 611 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 612 ) 613 if self._match(TokenType.TAG): 614 return self._parse_tag() 615 616 return None 617 618 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 619 if self._match(TokenType.TAG): 620 return self._parse_tag() 621 622 return super()._parse_with_property() 623 624 def _parse_create(self) -> exp.Create | exp.Command: 625 expression = super()._parse_create() 626 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 627 # Replace the Table node with the enclosed Identifier 628 expression.this.replace(expression.this.this) 629 630 return expression 631 632 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 633 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 634 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 635 this = self._parse_var() or self._parse_type() 636 637 if not this: 638 return None 639 640 self._match(TokenType.COMMA) 641 expression = self._parse_bitwise() 642 this = map_date_part(this) 643 name = this.name.upper() 644 645 if name.startswith("EPOCH"): 646 if name == "EPOCH_MILLISECOND": 647 scale = 10**3 648 elif name == "EPOCH_MICROSECOND": 649 scale = 10**6 650 elif name == "EPOCH_NANOSECOND": 651 scale = 10**9 652 else: 653 scale = None 654 655 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 656 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 657 658 if scale: 659 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 660 661 return to_unix 662 663 return self.expression(exp.Extract, this=this, expression=expression) 664 665 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 666 if is_map: 667 # Keys are strings in Snowflake's objects, see also: 668 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 669 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 670 return self._parse_slice(self._parse_string()) 671 672 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 673 674 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 675 lateral = super()._parse_lateral() 676 if not lateral: 677 return lateral 678 679 if isinstance(lateral.this, exp.Explode): 680 table_alias = lateral.args.get("alias") 681 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 682 if table_alias and not table_alias.args.get("columns"): 683 table_alias.set("columns", columns) 684 elif not table_alias: 685 exp.alias_(lateral, "_flattened", table=columns, copy=False) 686 687 return lateral 688 689 def _parse_table_parts( 690 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 691 ) -> exp.Table: 692 # https://docs.snowflake.com/en/user-guide/querying-stage 693 if self._match(TokenType.STRING, advance=False): 694 table = self._parse_string() 695 elif self._match_text_seq("@", advance=False): 696 table = self._parse_location_path() 697 else: 698 table = None 699 700 if table: 701 file_format = None 702 pattern = None 703 704 wrapped = self._match(TokenType.L_PAREN) 705 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 706 if self._match_text_seq("FILE_FORMAT", "=>"): 707 file_format = self._parse_string() or super()._parse_table_parts( 708 is_db_reference=is_db_reference 709 ) 710 elif self._match_text_seq("PATTERN", "=>"): 711 pattern = self._parse_string() 712 else: 713 break 714 715 self._match(TokenType.COMMA) 716 717 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 718 else: 719 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 720 721 return table 722 723 def _parse_id_var( 724 self, 725 any_token: bool = True, 726 tokens: t.Optional[t.Collection[TokenType]] = None, 727 ) -> t.Optional[exp.Expression]: 728 if self._match_text_seq("IDENTIFIER", "("): 729 identifier = ( 730 super()._parse_id_var(any_token=any_token, tokens=tokens) 731 or self._parse_string() 732 ) 733 self._match_r_paren() 734 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 735 736 return super()._parse_id_var(any_token=any_token, tokens=tokens) 737 738 def _parse_show_snowflake(self, this: str) -> exp.Show: 739 scope = None 740 scope_kind = None 741 742 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 743 # which is syntactically valid but has no effect on the output 744 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 745 746 history = self._match_text_seq("HISTORY") 747 748 like = self._parse_string() if self._match(TokenType.LIKE) else None 749 750 if self._match(TokenType.IN): 751 if self._match_text_seq("ACCOUNT"): 752 scope_kind = "ACCOUNT" 753 elif self._match_set(self.DB_CREATABLES): 754 scope_kind = self._prev.text.upper() 755 if self._curr: 756 scope = self._parse_table_parts() 757 elif self._curr: 758 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 759 scope = self._parse_table_parts() 760 761 return self.expression( 762 exp.Show, 763 **{ 764 "terse": terse, 765 "this": this, 766 "history": history, 767 "like": like, 768 "scope": scope, 769 "scope_kind": scope_kind, 770 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 771 "limit": self._parse_limit(), 772 "from": self._parse_string() if self._match(TokenType.FROM) else None, 773 }, 774 ) 775 776 def _parse_location_property(self) -> exp.LocationProperty: 777 self._match(TokenType.EQ) 778 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 779 780 def _parse_file_location(self) -> t.Optional[exp.Expression]: 781 # Parse either a subquery or a staged file 782 return ( 783 self._parse_select(table=True, parse_subquery_alias=False) 784 if self._match(TokenType.L_PAREN, advance=False) 785 else self._parse_table_parts() 786 ) 787 788 def _parse_location_path(self) -> exp.Var: 789 parts = [self._advance_any(ignore_reserved=True)] 790 791 # We avoid consuming a comma token because external tables like @foo and @bar 792 # can be joined in a query with a comma separator, as well as closing paren 793 # in case of subqueries 794 while self._is_connected() and not self._match_set( 795 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 796 ): 797 parts.append(self._advance_any(ignore_reserved=True)) 798 799 return exp.var("".join(part.text for part in parts if part)) 800 801 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 802 this = super()._parse_lambda_arg() 803 804 if not this: 805 return this 806 807 typ = self._parse_types() 808 809 if typ: 810 return self.expression(exp.Cast, this=this, to=typ) 811 812 return this 813 814 def _parse_foreign_key(self) -> exp.ForeignKey: 815 # inlineFK, the REFERENCES columns are implied 816 if self._match(TokenType.REFERENCES, advance=False): 817 return self.expression(exp.ForeignKey) 818 819 # outoflineFK, explicitly names the columns 820 return super()._parse_foreign_key()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
822 class Tokenizer(tokens.Tokenizer): 823 STRING_ESCAPES = ["\\", "'"] 824 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 825 RAW_STRINGS = ["$$"] 826 COMMENTS = ["--", "//", ("/*", "*/")] 827 NESTED_COMMENTS = False 828 829 KEYWORDS = { 830 **tokens.Tokenizer.KEYWORDS, 831 "BYTEINT": TokenType.INT, 832 "CHAR VARYING": TokenType.VARCHAR, 833 "CHARACTER VARYING": TokenType.VARCHAR, 834 "EXCLUDE": TokenType.EXCEPT, 835 "ILIKE ANY": TokenType.ILIKE_ANY, 836 "LIKE ANY": TokenType.LIKE_ANY, 837 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 838 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 839 "MINUS": TokenType.EXCEPT, 840 "NCHAR VARYING": TokenType.VARCHAR, 841 "PUT": TokenType.COMMAND, 842 "REMOVE": TokenType.COMMAND, 843 "RM": TokenType.COMMAND, 844 "SAMPLE": TokenType.TABLE_SAMPLE, 845 "SQL_DOUBLE": TokenType.DOUBLE, 846 "SQL_VARCHAR": TokenType.VARCHAR, 847 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 848 "TAG": TokenType.TAG, 849 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 850 "TOP": TokenType.TOP, 851 "WAREHOUSE": TokenType.WAREHOUSE, 852 "STREAMLIT": TokenType.STREAMLIT, 853 } 854 KEYWORDS.pop("/*+") 855 856 SINGLE_TOKENS = { 857 **tokens.Tokenizer.SINGLE_TOKENS, 858 "$": TokenType.PARAMETER, 859 } 860 861 VAR_SINGLE_TOKENS = {"$"} 862 863 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
865 class Generator(generator.Generator): 866 PARAMETER_TOKEN = "$" 867 MATCHED_BY_SOURCE = False 868 SINGLE_STRING_INTERVAL = True 869 JOIN_HINTS = False 870 TABLE_HINTS = False 871 QUERY_HINTS = False 872 AGGREGATE_FILTER_SUPPORTED = False 873 SUPPORTS_TABLE_COPY = False 874 COLLATE_IS_FUNC = True 875 LIMIT_ONLY_LITERALS = True 876 JSON_KEY_VALUE_PAIR_SEP = "," 877 INSERT_OVERWRITE = " OVERWRITE INTO" 878 STRUCT_DELIMITER = ("(", ")") 879 COPY_PARAMS_ARE_WRAPPED = False 880 COPY_PARAMS_EQ_REQUIRED = True 881 STAR_EXCEPT = "EXCLUDE" 882 SUPPORTS_EXPLODING_PROJECTIONS = False 883 ARRAY_CONCAT_IS_VAR_LEN = False 884 SUPPORTS_CONVERT_TIMEZONE = True 885 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 886 SUPPORTS_MEDIAN = True 887 ARRAY_SIZE_NAME = "ARRAY_SIZE" 888 889 TRANSFORMS = { 890 **generator.Generator.TRANSFORMS, 891 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 892 exp.ArgMax: rename_func("MAX_BY"), 893 exp.ArgMin: rename_func("MIN_BY"), 894 exp.Array: inline_array_sql, 895 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 896 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 897 exp.AtTimeZone: lambda self, e: self.func( 898 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 899 ), 900 exp.BitwiseOr: rename_func("BITOR"), 901 exp.BitwiseXor: rename_func("BITXOR"), 902 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 903 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 904 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 905 exp.DateAdd: date_delta_sql("DATEADD"), 906 exp.DateDiff: date_delta_sql("DATEDIFF"), 907 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 908 exp.DatetimeDiff: timestampdiff_sql, 909 exp.DateStrToDate: datestrtodate_sql, 910 exp.DayOfMonth: rename_func("DAYOFMONTH"), 911 exp.DayOfWeek: rename_func("DAYOFWEEK"), 912 exp.DayOfYear: rename_func("DAYOFYEAR"), 913 exp.Explode: rename_func("FLATTEN"), 914 exp.Extract: rename_func("DATE_PART"), 915 exp.FromTimeZone: lambda self, e: self.func( 916 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 917 ), 918 exp.GenerateSeries: lambda self, e: self.func( 919 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 920 ), 921 exp.GroupConcat: rename_func("LISTAGG"), 922 exp.If: if_sql(name="IFF", false_value="NULL"), 923 exp.JSONExtractArray: _json_extract_value_array_sql, 924 exp.JSONExtractScalar: lambda self, e: self.func( 925 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 926 ), 927 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 928 exp.JSONPathRoot: lambda *_: "", 929 exp.JSONValueArray: _json_extract_value_array_sql, 930 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 931 exp.LogicalOr: rename_func("BOOLOR_AGG"), 932 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 933 exp.MakeInterval: no_make_interval_sql, 934 exp.Max: max_or_greatest, 935 exp.Min: min_or_least, 936 exp.ParseJSON: lambda self, e: self.func( 937 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 938 ), 939 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 940 exp.PercentileCont: transforms.preprocess( 941 [transforms.add_within_group_for_percentiles] 942 ), 943 exp.PercentileDisc: transforms.preprocess( 944 [transforms.add_within_group_for_percentiles] 945 ), 946 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 947 exp.RegexpExtract: _regexpextract_sql, 948 exp.RegexpExtractAll: _regexpextract_sql, 949 exp.RegexpILike: _regexpilike_sql, 950 exp.Rand: rename_func("RANDOM"), 951 exp.Select: transforms.preprocess( 952 [ 953 transforms.eliminate_distinct_on, 954 transforms.explode_to_unnest(), 955 transforms.eliminate_semi_and_anti_joins, 956 _transform_generate_date_array, 957 ] 958 ), 959 exp.SHA: rename_func("SHA1"), 960 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 961 exp.StartsWith: rename_func("STARTSWITH"), 962 exp.StrPosition: lambda self, e: strposition_sql( 963 self, e, func_name="CHARINDEX", supports_position=True 964 ), 965 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 966 exp.Stuff: rename_func("INSERT"), 967 exp.TimeAdd: date_delta_sql("TIMEADD"), 968 exp.Timestamp: no_timestamp_sql, 969 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 970 exp.TimestampDiff: lambda self, e: self.func( 971 "TIMESTAMPDIFF", e.unit, e.expression, e.this 972 ), 973 exp.TimestampTrunc: timestamptrunc_sql(), 974 exp.TimeStrToTime: timestrtotime_sql, 975 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 976 exp.ToArray: rename_func("TO_ARRAY"), 977 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 978 exp.ToDouble: rename_func("TO_DOUBLE"), 979 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 980 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 981 exp.TsOrDsToDate: lambda self, e: self.func( 982 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 983 ), 984 exp.TsOrDsToTime: lambda self, e: self.func( 985 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 986 ), 987 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 988 exp.Uuid: rename_func("UUID_STRING"), 989 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 990 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 991 exp.Xor: rename_func("BOOLXOR"), 992 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 993 rename_func("EDITDISTANCE") 994 ), 995 } 996 997 SUPPORTED_JSON_PATH_PARTS = { 998 exp.JSONPathKey, 999 exp.JSONPathRoot, 1000 exp.JSONPathSubscript, 1001 } 1002 1003 TYPE_MAPPING = { 1004 **generator.Generator.TYPE_MAPPING, 1005 exp.DataType.Type.NESTED: "OBJECT", 1006 exp.DataType.Type.STRUCT: "OBJECT", 1007 } 1008 1009 PROPERTIES_LOCATION = { 1010 **generator.Generator.PROPERTIES_LOCATION, 1011 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1012 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1013 } 1014 1015 UNSUPPORTED_VALUES_EXPRESSIONS = { 1016 exp.Map, 1017 exp.StarMap, 1018 exp.Struct, 1019 exp.VarMap, 1020 } 1021 1022 def with_properties(self, properties: exp.Properties) -> str: 1023 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1024 1025 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1026 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1027 values_as_table = False 1028 1029 return super().values_sql(expression, values_as_table=values_as_table) 1030 1031 def datatype_sql(self, expression: exp.DataType) -> str: 1032 expressions = expression.expressions 1033 if ( 1034 expressions 1035 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1036 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1037 ): 1038 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1039 return "OBJECT" 1040 1041 return super().datatype_sql(expression) 1042 1043 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1044 return self.func( 1045 "TO_NUMBER", 1046 expression.this, 1047 expression.args.get("format"), 1048 expression.args.get("precision"), 1049 expression.args.get("scale"), 1050 ) 1051 1052 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1053 milli = expression.args.get("milli") 1054 if milli is not None: 1055 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1056 expression.set("nano", milli_to_nano) 1057 1058 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1059 1060 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1061 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1062 return self.func("TO_GEOGRAPHY", expression.this) 1063 if expression.is_type(exp.DataType.Type.GEOMETRY): 1064 return self.func("TO_GEOMETRY", expression.this) 1065 1066 return super().cast_sql(expression, safe_prefix=safe_prefix) 1067 1068 def trycast_sql(self, expression: exp.TryCast) -> str: 1069 value = expression.this 1070 1071 if value.type is None: 1072 from sqlglot.optimizer.annotate_types import annotate_types 1073 1074 value = annotate_types(value) 1075 1076 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1077 return super().trycast_sql(expression) 1078 1079 # TRY_CAST only works for string values in Snowflake 1080 return self.cast_sql(expression) 1081 1082 def log_sql(self, expression: exp.Log) -> str: 1083 if not expression.expression: 1084 return self.func("LN", expression.this) 1085 1086 return super().log_sql(expression) 1087 1088 def unnest_sql(self, expression: exp.Unnest) -> str: 1089 unnest_alias = expression.args.get("alias") 1090 offset = expression.args.get("offset") 1091 1092 columns = [ 1093 exp.to_identifier("seq"), 1094 exp.to_identifier("key"), 1095 exp.to_identifier("path"), 1096 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1097 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1098 or exp.to_identifier("value"), 1099 exp.to_identifier("this"), 1100 ] 1101 1102 if unnest_alias: 1103 unnest_alias.set("columns", columns) 1104 else: 1105 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1106 1107 table_input = self.sql(expression.expressions[0]) 1108 if not table_input.startswith("INPUT =>"): 1109 table_input = f"INPUT => {table_input}" 1110 1111 explode = f"TABLE(FLATTEN({table_input}))" 1112 alias = self.sql(unnest_alias) 1113 alias = f" AS {alias}" if alias else "" 1114 return f"{explode}{alias}" 1115 1116 def show_sql(self, expression: exp.Show) -> str: 1117 terse = "TERSE " if expression.args.get("terse") else "" 1118 history = " HISTORY" if expression.args.get("history") else "" 1119 like = self.sql(expression, "like") 1120 like = f" LIKE {like}" if like else "" 1121 1122 scope = self.sql(expression, "scope") 1123 scope = f" {scope}" if scope else "" 1124 1125 scope_kind = self.sql(expression, "scope_kind") 1126 if scope_kind: 1127 scope_kind = f" IN {scope_kind}" 1128 1129 starts_with = self.sql(expression, "starts_with") 1130 if starts_with: 1131 starts_with = f" STARTS WITH {starts_with}" 1132 1133 limit = self.sql(expression, "limit") 1134 1135 from_ = self.sql(expression, "from") 1136 if from_: 1137 from_ = f" FROM {from_}" 1138 1139 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1140 1141 def describe_sql(self, expression: exp.Describe) -> str: 1142 # Default to table if kind is unknown 1143 kind_value = expression.args.get("kind") or "TABLE" 1144 kind = f" {kind_value}" if kind_value else "" 1145 this = f" {self.sql(expression, 'this')}" 1146 expressions = self.expressions(expression, flat=True) 1147 expressions = f" {expressions}" if expressions else "" 1148 return f"DESCRIBE{kind}{this}{expressions}" 1149 1150 def generatedasidentitycolumnconstraint_sql( 1151 self, expression: exp.GeneratedAsIdentityColumnConstraint 1152 ) -> str: 1153 start = expression.args.get("start") 1154 start = f" START {start}" if start else "" 1155 increment = expression.args.get("increment") 1156 increment = f" INCREMENT {increment}" if increment else "" 1157 return f"AUTOINCREMENT{start}{increment}" 1158 1159 def cluster_sql(self, expression: exp.Cluster) -> str: 1160 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1161 1162 def struct_sql(self, expression: exp.Struct) -> str: 1163 keys = [] 1164 values = [] 1165 1166 for i, e in enumerate(expression.expressions): 1167 if isinstance(e, exp.PropertyEQ): 1168 keys.append( 1169 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1170 ) 1171 values.append(e.expression) 1172 else: 1173 keys.append(exp.Literal.string(f"_{i}")) 1174 values.append(e) 1175 1176 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1177 1178 @unsupported_args("weight", "accuracy") 1179 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1180 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1181 1182 def alterset_sql(self, expression: exp.AlterSet) -> str: 1183 exprs = self.expressions(expression, flat=True) 1184 exprs = f" {exprs}" if exprs else "" 1185 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1186 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1187 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1188 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1189 tag = self.expressions(expression, key="tag", flat=True) 1190 tag = f" TAG {tag}" if tag else "" 1191 1192 return f"SET{exprs}{file_format}{copy_options}{tag}" 1193 1194 def strtotime_sql(self, expression: exp.StrToTime): 1195 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1196 return self.func( 1197 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1198 ) 1199 1200 def timestampsub_sql(self, expression: exp.TimestampSub): 1201 return self.sql( 1202 exp.TimestampAdd( 1203 this=expression.this, 1204 expression=expression.expression * -1, 1205 unit=expression.unit, 1206 ) 1207 ) 1208 1209 def jsonextract_sql(self, expression: exp.JSONExtract): 1210 this = expression.this 1211 1212 # JSON strings are valid coming from other dialects such as BQ 1213 return self.func( 1214 "GET_PATH", 1215 exp.ParseJSON(this=this) if this.is_string else this, 1216 expression.expression, 1217 ) 1218 1219 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1220 this = expression.this 1221 if not isinstance(this, exp.TsOrDsToTimestamp): 1222 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1223 1224 return self.func("TO_CHAR", this, self.format_time(expression)) 1225 1226 def datesub_sql(self, expression: exp.DateSub) -> str: 1227 value = expression.expression 1228 if value: 1229 value.replace(value * (-1)) 1230 else: 1231 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1232 1233 return date_delta_sql("DATEADD")(self, expression) 1234 1235 def select_sql(self, expression: exp.Select) -> str: 1236 limit = expression.args.get("limit") 1237 offset = expression.args.get("offset") 1238 if offset and not limit: 1239 expression.limit(exp.Null(), copy=False) 1240 return super().select_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1031 def datatype_sql(self, expression: exp.DataType) -> str: 1032 expressions = expression.expressions 1033 if ( 1034 expressions 1035 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1036 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1037 ): 1038 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1039 return "OBJECT" 1040 1041 return super().datatype_sql(expression)
1052 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1053 milli = expression.args.get("milli") 1054 if milli is not None: 1055 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1056 expression.set("nano", milli_to_nano) 1057 1058 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1060 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1061 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1062 return self.func("TO_GEOGRAPHY", expression.this) 1063 if expression.is_type(exp.DataType.Type.GEOMETRY): 1064 return self.func("TO_GEOMETRY", expression.this) 1065 1066 return super().cast_sql(expression, safe_prefix=safe_prefix)
1068 def trycast_sql(self, expression: exp.TryCast) -> str: 1069 value = expression.this 1070 1071 if value.type is None: 1072 from sqlglot.optimizer.annotate_types import annotate_types 1073 1074 value = annotate_types(value) 1075 1076 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1077 return super().trycast_sql(expression) 1078 1079 # TRY_CAST only works for string values in Snowflake 1080 return self.cast_sql(expression)
1088 def unnest_sql(self, expression: exp.Unnest) -> str: 1089 unnest_alias = expression.args.get("alias") 1090 offset = expression.args.get("offset") 1091 1092 columns = [ 1093 exp.to_identifier("seq"), 1094 exp.to_identifier("key"), 1095 exp.to_identifier("path"), 1096 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1097 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1098 or exp.to_identifier("value"), 1099 exp.to_identifier("this"), 1100 ] 1101 1102 if unnest_alias: 1103 unnest_alias.set("columns", columns) 1104 else: 1105 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1106 1107 table_input = self.sql(expression.expressions[0]) 1108 if not table_input.startswith("INPUT =>"): 1109 table_input = f"INPUT => {table_input}" 1110 1111 explode = f"TABLE(FLATTEN({table_input}))" 1112 alias = self.sql(unnest_alias) 1113 alias = f" AS {alias}" if alias else "" 1114 return f"{explode}{alias}"
1116 def show_sql(self, expression: exp.Show) -> str: 1117 terse = "TERSE " if expression.args.get("terse") else "" 1118 history = " HISTORY" if expression.args.get("history") else "" 1119 like = self.sql(expression, "like") 1120 like = f" LIKE {like}" if like else "" 1121 1122 scope = self.sql(expression, "scope") 1123 scope = f" {scope}" if scope else "" 1124 1125 scope_kind = self.sql(expression, "scope_kind") 1126 if scope_kind: 1127 scope_kind = f" IN {scope_kind}" 1128 1129 starts_with = self.sql(expression, "starts_with") 1130 if starts_with: 1131 starts_with = f" STARTS WITH {starts_with}" 1132 1133 limit = self.sql(expression, "limit") 1134 1135 from_ = self.sql(expression, "from") 1136 if from_: 1137 from_ = f" FROM {from_}" 1138 1139 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
1141 def describe_sql(self, expression: exp.Describe) -> str: 1142 # Default to table if kind is unknown 1143 kind_value = expression.args.get("kind") or "TABLE" 1144 kind = f" {kind_value}" if kind_value else "" 1145 this = f" {self.sql(expression, 'this')}" 1146 expressions = self.expressions(expression, flat=True) 1147 expressions = f" {expressions}" if expressions else "" 1148 return f"DESCRIBE{kind}{this}{expressions}"
1150 def generatedasidentitycolumnconstraint_sql( 1151 self, expression: exp.GeneratedAsIdentityColumnConstraint 1152 ) -> str: 1153 start = expression.args.get("start") 1154 start = f" START {start}" if start else "" 1155 increment = expression.args.get("increment") 1156 increment = f" INCREMENT {increment}" if increment else "" 1157 return f"AUTOINCREMENT{start}{increment}"
1162 def struct_sql(self, expression: exp.Struct) -> str: 1163 keys = [] 1164 values = [] 1165 1166 for i, e in enumerate(expression.expressions): 1167 if isinstance(e, exp.PropertyEQ): 1168 keys.append( 1169 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1170 ) 1171 values.append(e.expression) 1172 else: 1173 keys.append(exp.Literal.string(f"_{i}")) 1174 values.append(e) 1175 1176 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1182 def alterset_sql(self, expression: exp.AlterSet) -> str: 1183 exprs = self.expressions(expression, flat=True) 1184 exprs = f" {exprs}" if exprs else "" 1185 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1186 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1187 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1188 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1189 tag = self.expressions(expression, key="tag", flat=True) 1190 tag = f" TAG {tag}" if tag else "" 1191 1192 return f"SET{exprs}{file_format}{copy_options}{tag}"
1226 def datesub_sql(self, expression: exp.DateSub) -> str: 1227 value = expression.expression 1228 if value: 1229 value.replace(value * (-1)) 1230 else: 1231 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1232 1233 return date_delta_sql("DATEADD")(self, expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql