sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_trunc_to_time, 19 datestrtodate_sql, 20 no_datetime_sql, 21 encode_decode_sql, 22 build_formatted_time, 23 inline_array_unless_query, 24 no_comment_column_constraint_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 strposition_sql, 30 str_to_time_sql, 31 timestamptrunc_sql, 32 timestrtotime_sql, 33 unit_to_var, 34 unit_to_str, 35 sha256_sql, 36 build_regexp_extract, 37 explode_to_unnest_sql, 38 no_make_interval_sql, 39 groupconcat_sql, 40) 41from sqlglot.generator import unsupported_args 42from sqlglot.helper import seq_get 43from sqlglot.tokens import TokenType 44from sqlglot.parser import binary_range_parser 45 46DATETIME_DELTA = t.Union[ 47 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 48] 49 50WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 51 exp.FirstValue, 52 exp.LastValue, 53 exp.Lag, 54 exp.Lead, 55 exp.NthValue, 56) 57 58 59def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 60 this = expression.this 61 unit = unit_to_var(expression) 62 op = ( 63 "+" 64 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 65 else "-" 66 ) 67 68 to_type: t.Optional[DATA_TYPE] = None 69 if isinstance(expression, exp.TsOrDsAdd): 70 to_type = expression.return_type 71 elif this.is_string: 72 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 73 to_type = ( 74 exp.DataType.Type.DATETIME 75 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 76 else exp.DataType.Type.DATE 77 ) 78 79 this = exp.cast(this, to_type) if to_type else this 80 81 expr = expression.expression 82 interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit) 83 84 return f"{self.sql(this)} {op} {self.sql(interval)}" 85 86 87# BigQuery -> DuckDB conversion for the DATE function 88def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 89 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 90 zone = self.sql(expression, "zone") 91 92 if zone: 93 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 94 date_str = f"{date_str} || ' ' || {zone}" 95 96 # This will create a TIMESTAMP with time zone information 97 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 98 99 return result 100 101 102# BigQuery -> DuckDB conversion for the TIME_DIFF function 103def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 104 this = exp.cast(expression.this, exp.DataType.Type.TIME) 105 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 106 107 # Although the 2 dialects share similar signatures, BQ seems to inverse 108 # the sign of the result so the start/end time operands are flipped 109 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 110 111 112@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 113def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 114 return self.func("ARRAY_SORT", expression.this) 115 116 117def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 118 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 119 return self.func(name, expression.this) 120 121 122def _build_sort_array_desc(args: t.List) -> exp.Expression: 123 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 124 125 126def _build_date_diff(args: t.List) -> exp.Expression: 127 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 128 129 130def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 131 def _builder(args: t.List) -> exp.GenerateSeries: 132 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 133 if len(args) == 1: 134 # DuckDB uses 0 as a default for the series' start when it's omitted 135 args.insert(0, exp.Literal.number("0")) 136 137 gen_series = exp.GenerateSeries.from_arg_list(args) 138 gen_series.set("is_end_exclusive", end_exclusive) 139 140 return gen_series 141 142 return _builder 143 144 145def _build_make_timestamp(args: t.List) -> exp.Expression: 146 if len(args) == 1: 147 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 148 149 return exp.TimestampFromParts( 150 year=seq_get(args, 0), 151 month=seq_get(args, 1), 152 day=seq_get(args, 2), 153 hour=seq_get(args, 3), 154 min=seq_get(args, 4), 155 sec=seq_get(args, 5), 156 ) 157 158 159def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 160 def _parse(self: DuckDB.Parser) -> exp.Show: 161 return self._parse_show_duckdb(*args, **kwargs) 162 163 return _parse 164 165 166def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 167 args: t.List[str] = [] 168 169 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 170 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 171 # The transformation to ROW will take place if: 172 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 173 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 174 ancestor_cast = expression.find_ancestor(exp.Cast) 175 is_bq_inline_struct = ( 176 (expression.find(exp.PropertyEQ) is None) 177 and ancestor_cast 178 and any( 179 casted_type.is_type(exp.DataType.Type.STRUCT) 180 for casted_type in ancestor_cast.find_all(exp.DataType) 181 ) 182 ) 183 184 for i, expr in enumerate(expression.expressions): 185 is_property_eq = isinstance(expr, exp.PropertyEQ) 186 value = expr.expression if is_property_eq else expr 187 188 if is_bq_inline_struct: 189 args.append(self.sql(value)) 190 else: 191 key = expr.name if is_property_eq else f"_{i}" 192 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 193 194 csv_args = ", ".join(args) 195 196 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 197 198 199def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 200 if expression.is_type("array"): 201 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 202 203 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 204 if expression.is_type( 205 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 206 ): 207 return expression.this.value 208 209 return self.datatype_sql(expression) 210 211 212def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 213 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 214 return f"CAST({sql} AS TEXT)" 215 216 217def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 218 scale = expression.args.get("scale") 219 timestamp = expression.this 220 221 if scale in (None, exp.UnixToTime.SECONDS): 222 return self.func("TO_TIMESTAMP", timestamp) 223 if scale == exp.UnixToTime.MILLIS: 224 return self.func("EPOCH_MS", timestamp) 225 if scale == exp.UnixToTime.MICROS: 226 return self.func("MAKE_TIMESTAMP", timestamp) 227 228 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 229 230 231WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 232 233 234def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 235 arrow_sql = arrow_json_extract_sql(self, expression) 236 if not expression.same_parent and isinstance( 237 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 238 ): 239 arrow_sql = self.wrap(arrow_sql) 240 return arrow_sql 241 242 243def _implicit_datetime_cast( 244 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 245) -> t.Optional[exp.Expression]: 246 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 247 248 249def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 250 this = _implicit_datetime_cast(expression.this) 251 expr = _implicit_datetime_cast(expression.expression) 252 253 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 254 255 256def _generate_datetime_array_sql( 257 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 258) -> str: 259 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 260 261 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 262 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 263 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 264 265 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 266 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 267 start=start, end=end, step=expression.args.get("step") 268 ) 269 270 if is_generate_date_array: 271 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 272 # GENERATE_DATE_ARRAY we must cast it back to DATE array 273 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 274 275 return self.sql(gen_series) 276 277 278def _json_extract_value_array_sql( 279 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 280) -> str: 281 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 282 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 283 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 284 285 286class DuckDB(Dialect): 287 NULL_ORDERING = "nulls_are_last" 288 SUPPORTS_USER_DEFINED_TYPES = True 289 SAFE_DIVISION = True 290 INDEX_OFFSET = 1 291 CONCAT_COALESCE = True 292 SUPPORTS_ORDER_BY_ALL = True 293 SUPPORTS_FIXED_SIZE_ARRAYS = True 294 STRICT_JSON_PATH_SYNTAX = False 295 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 296 297 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 298 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 299 300 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 301 if isinstance(path, exp.Literal): 302 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 303 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 304 # This check ensures we'll avoid trying to parse these as JSON paths, which can 305 # either result in a noisy warning or in an invalid representation of the path. 306 path_text = path.name 307 if path_text.startswith("/") or "[#" in path_text: 308 return path 309 310 return super().to_json_path(path) 311 312 class Tokenizer(tokens.Tokenizer): 313 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 314 HEREDOC_STRINGS = ["$"] 315 316 HEREDOC_TAG_IS_IDENTIFIER = True 317 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "//": TokenType.DIV, 322 "**": TokenType.DSTAR, 323 "^@": TokenType.CARET_AT, 324 "@>": TokenType.AT_GT, 325 "<@": TokenType.LT_AT, 326 "ATTACH": TokenType.ATTACH, 327 "BINARY": TokenType.VARBINARY, 328 "BITSTRING": TokenType.BIT, 329 "BPCHAR": TokenType.TEXT, 330 "CHAR": TokenType.TEXT, 331 "CHARACTER VARYING": TokenType.TEXT, 332 "DETACH": TokenType.DETACH, 333 "EXCLUDE": TokenType.EXCEPT, 334 "LOGICAL": TokenType.BOOLEAN, 335 "ONLY": TokenType.ONLY, 336 "PIVOT_WIDER": TokenType.PIVOT, 337 "POSITIONAL": TokenType.POSITIONAL, 338 "SIGNED": TokenType.INT, 339 "STRING": TokenType.TEXT, 340 "SUMMARIZE": TokenType.SUMMARIZE, 341 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 342 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 343 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 344 "TIMESTAMP_US": TokenType.TIMESTAMP, 345 "UBIGINT": TokenType.UBIGINT, 346 "UINTEGER": TokenType.UINT, 347 "USMALLINT": TokenType.USMALLINT, 348 "UTINYINT": TokenType.UTINYINT, 349 "VARCHAR": TokenType.TEXT, 350 } 351 KEYWORDS.pop("/*+") 352 353 SINGLE_TOKENS = { 354 **tokens.Tokenizer.SINGLE_TOKENS, 355 "$": TokenType.PARAMETER, 356 } 357 358 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 359 360 class Parser(parser.Parser): 361 BITWISE = { 362 **parser.Parser.BITWISE, 363 TokenType.TILDA: exp.RegexpLike, 364 } 365 BITWISE.pop(TokenType.CARET) 366 367 RANGE_PARSERS = { 368 **parser.Parser.RANGE_PARSERS, 369 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 370 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 371 } 372 373 EXPONENT = { 374 **parser.Parser.EXPONENT, 375 TokenType.CARET: exp.Pow, 376 TokenType.DSTAR: exp.Pow, 377 } 378 379 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 380 381 SHOW_PARSERS = { 382 "TABLES": _show_parser("TABLES"), 383 "ALL TABLES": _show_parser("ALL TABLES"), 384 } 385 386 FUNCTIONS = { 387 **parser.Parser.FUNCTIONS, 388 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 389 "ARRAY_SORT": exp.SortArray.from_arg_list, 390 "DATEDIFF": _build_date_diff, 391 "DATE_DIFF": _build_date_diff, 392 "DATE_TRUNC": date_trunc_to_time, 393 "DATETRUNC": date_trunc_to_time, 394 "DECODE": lambda args: exp.Decode( 395 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 396 ), 397 "EDITDIST3": exp.Levenshtein.from_arg_list, 398 "ENCODE": lambda args: exp.Encode( 399 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 400 ), 401 "EPOCH": exp.TimeToUnix.from_arg_list, 402 "EPOCH_MS": lambda args: exp.UnixToTime( 403 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 404 ), 405 "GENERATE_SERIES": _build_generate_series(), 406 "JSON": exp.ParseJSON.from_arg_list, 407 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 408 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 409 "LIST_HAS": exp.ArrayContains.from_arg_list, 410 "LIST_REVERSE_SORT": _build_sort_array_desc, 411 "LIST_SORT": exp.SortArray.from_arg_list, 412 "LIST_VALUE": lambda args: exp.Array(expressions=args), 413 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 414 "MAKE_TIMESTAMP": _build_make_timestamp, 415 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 416 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 417 "RANGE": _build_generate_series(end_exclusive=True), 418 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 419 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 420 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 421 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 422 this=seq_get(args, 0), 423 expression=seq_get(args, 1), 424 replacement=seq_get(args, 2), 425 modifiers=seq_get(args, 3), 426 ), 427 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 428 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 429 "STRING_SPLIT": exp.Split.from_arg_list, 430 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 431 "STRING_TO_ARRAY": exp.Split.from_arg_list, 432 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 433 "STRUCT_PACK": exp.Struct.from_arg_list, 434 "STR_SPLIT": exp.Split.from_arg_list, 435 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 436 "TIME_BUCKET": exp.DateBin.from_arg_list, 437 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 438 "UNNEST": exp.Explode.from_arg_list, 439 "XOR": binary_from_function(exp.BitwiseXor), 440 } 441 442 FUNCTIONS.pop("DATE_SUB") 443 FUNCTIONS.pop("GLOB") 444 445 FUNCTION_PARSERS = { 446 **parser.Parser.FUNCTION_PARSERS, 447 **dict.fromkeys( 448 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 449 ), 450 } 451 FUNCTION_PARSERS.pop("DECODE") 452 453 NO_PAREN_FUNCTION_PARSERS = { 454 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 455 "MAP": lambda self: self._parse_map(), 456 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 457 } 458 459 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 460 TokenType.SEMI, 461 TokenType.ANTI, 462 } 463 464 PLACEHOLDER_PARSERS = { 465 **parser.Parser.PLACEHOLDER_PARSERS, 466 TokenType.PARAMETER: lambda self: ( 467 self.expression(exp.Placeholder, this=self._prev.text) 468 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 469 else None 470 ), 471 } 472 473 TYPE_CONVERTERS = { 474 # https://duckdb.org/docs/sql/data_types/numeric 475 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 476 # https://duckdb.org/docs/sql/data_types/text 477 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 478 } 479 480 STATEMENT_PARSERS = { 481 **parser.Parser.STATEMENT_PARSERS, 482 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 483 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 484 TokenType.SHOW: lambda self: self._parse_show(), 485 } 486 487 def _parse_expression(self) -> t.Optional[exp.Expression]: 488 # DuckDB supports prefix aliases, e.g. foo: 1 489 if self._next and self._next.token_type == TokenType.COLON: 490 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 491 self._match(TokenType.COLON) 492 comments = self._prev_comments or [] 493 494 this = self._parse_assignment() 495 if isinstance(this, exp.Expression): 496 # Moves the comment next to the alias in `alias: expr /* comment */` 497 comments += this.pop_comments() or [] 498 499 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 500 501 return super()._parse_expression() 502 503 def _parse_table( 504 self, 505 schema: bool = False, 506 joins: bool = False, 507 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 508 parse_bracket: bool = False, 509 is_db_reference: bool = False, 510 parse_partition: bool = False, 511 ) -> t.Optional[exp.Expression]: 512 # DuckDB supports prefix aliases, e.g. FROM foo: bar 513 if self._next and self._next.token_type == TokenType.COLON: 514 alias = self._parse_table_alias( 515 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 516 ) 517 self._match(TokenType.COLON) 518 comments = self._prev_comments or [] 519 else: 520 alias = None 521 comments = [] 522 523 table = super()._parse_table( 524 schema=schema, 525 joins=joins, 526 alias_tokens=alias_tokens, 527 parse_bracket=parse_bracket, 528 is_db_reference=is_db_reference, 529 parse_partition=parse_partition, 530 ) 531 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 532 # Moves the comment next to the alias in `alias: table /* comment */` 533 comments += table.pop_comments() or [] 534 alias.comments = alias.pop_comments() + comments 535 table.set("alias", alias) 536 537 return table 538 539 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 540 # https://duckdb.org/docs/sql/samples.html 541 sample = super()._parse_table_sample(as_modifier=as_modifier) 542 if sample and not sample.args.get("method"): 543 if sample.args.get("size"): 544 sample.set("method", exp.var("RESERVOIR")) 545 else: 546 sample.set("method", exp.var("SYSTEM")) 547 548 return sample 549 550 def _parse_bracket( 551 self, this: t.Optional[exp.Expression] = None 552 ) -> t.Optional[exp.Expression]: 553 bracket = super()._parse_bracket(this) 554 555 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 556 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 557 bracket.set("returns_list_for_maps", True) 558 559 return bracket 560 561 def _parse_map(self) -> exp.ToMap | exp.Map: 562 if self._match(TokenType.L_BRACE, advance=False): 563 return self.expression(exp.ToMap, this=self._parse_bracket()) 564 565 args = self._parse_wrapped_csv(self._parse_assignment) 566 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 567 568 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 569 return self._parse_field_def() 570 571 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 572 if len(aggregations) == 1: 573 return super()._pivot_column_names(aggregations) 574 return pivot_column_names(aggregations, dialect="duckdb") 575 576 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 577 def _parse_attach_option() -> exp.AttachOption: 578 return self.expression( 579 exp.AttachOption, 580 this=self._parse_var(any_token=True), 581 expression=self._parse_field(any_token=True), 582 ) 583 584 self._match(TokenType.DATABASE) 585 exists = self._parse_exists(not_=is_attach) 586 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 587 588 if self._match(TokenType.L_PAREN, advance=False): 589 expressions = self._parse_wrapped_csv(_parse_attach_option) 590 else: 591 expressions = None 592 593 return ( 594 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 595 if is_attach 596 else self.expression(exp.Detach, this=this, exists=exists) 597 ) 598 599 def _parse_show_duckdb(self, this: str) -> exp.Show: 600 return self.expression(exp.Show, this=this) 601 602 class Generator(generator.Generator): 603 PARAMETER_TOKEN = "$" 604 NAMED_PLACEHOLDER_TOKEN = "$" 605 JOIN_HINTS = False 606 TABLE_HINTS = False 607 QUERY_HINTS = False 608 LIMIT_FETCH = "LIMIT" 609 STRUCT_DELIMITER = ("(", ")") 610 RENAME_TABLE_WITH_DB = False 611 NVL2_SUPPORTED = False 612 SEMI_ANTI_JOIN_WITH_SIDE = False 613 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 614 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 615 LAST_DAY_SUPPORTS_DATE_PART = False 616 JSON_KEY_VALUE_PAIR_SEP = "," 617 IGNORE_NULLS_IN_FUNC = True 618 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 619 SUPPORTS_CREATE_TABLE_LIKE = False 620 MULTI_ARG_DISTINCT = False 621 CAN_IMPLEMENT_ARRAY_ANY = True 622 SUPPORTS_TO_NUMBER = False 623 COPY_HAS_INTO_KEYWORD = False 624 STAR_EXCEPT = "EXCLUDE" 625 PAD_FILL_PATTERN_IS_REQUIRED = True 626 ARRAY_CONCAT_IS_VAR_LEN = False 627 ARRAY_SIZE_DIM_REQUIRED = False 628 629 TRANSFORMS = { 630 **generator.Generator.TRANSFORMS, 631 exp.ApproxDistinct: approx_count_distinct_sql, 632 exp.Array: inline_array_unless_query, 633 exp.ArrayFilter: rename_func("LIST_FILTER"), 634 exp.ArraySort: _array_sort_sql, 635 exp.ArraySum: rename_func("LIST_SUM"), 636 exp.BitwiseXor: rename_func("XOR"), 637 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 638 exp.CurrentDate: lambda *_: "CURRENT_DATE", 639 exp.CurrentTime: lambda *_: "CURRENT_TIME", 640 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 641 exp.DayOfMonth: rename_func("DAYOFMONTH"), 642 exp.DayOfWeek: rename_func("DAYOFWEEK"), 643 exp.DayOfWeekIso: rename_func("ISODOW"), 644 exp.DayOfYear: rename_func("DAYOFYEAR"), 645 exp.DataType: _datatype_sql, 646 exp.Date: _date_sql, 647 exp.DateAdd: _date_delta_sql, 648 exp.DateFromParts: rename_func("MAKE_DATE"), 649 exp.DateSub: _date_delta_sql, 650 exp.DateDiff: _date_diff_sql, 651 exp.DateStrToDate: datestrtodate_sql, 652 exp.Datetime: no_datetime_sql, 653 exp.DatetimeSub: _date_delta_sql, 654 exp.DatetimeAdd: _date_delta_sql, 655 exp.DateToDi: lambda self, 656 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 657 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 658 exp.DiToDate: lambda self, 659 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 660 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 661 exp.GenerateDateArray: _generate_datetime_array_sql, 662 exp.GenerateTimestampArray: _generate_datetime_array_sql, 663 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 664 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 665 exp.Explode: rename_func("UNNEST"), 666 exp.IntDiv: lambda self, e: self.binary(e, "//"), 667 exp.IsInf: rename_func("ISINF"), 668 exp.IsNan: rename_func("ISNAN"), 669 exp.JSONBExists: rename_func("JSON_EXISTS"), 670 exp.JSONExtract: _arrow_json_extract_sql, 671 exp.JSONExtractArray: _json_extract_value_array_sql, 672 exp.JSONExtractScalar: _arrow_json_extract_sql, 673 exp.JSONFormat: _json_format_sql, 674 exp.JSONValueArray: _json_extract_value_array_sql, 675 exp.Lateral: explode_to_unnest_sql, 676 exp.LogicalOr: rename_func("BOOL_OR"), 677 exp.LogicalAnd: rename_func("BOOL_AND"), 678 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 679 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 680 exp.MonthsBetween: lambda self, e: self.func( 681 "DATEDIFF", 682 "'month'", 683 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 684 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 685 ), 686 exp.PercentileCont: rename_func("QUANTILE_CONT"), 687 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 688 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 689 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 690 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 691 exp.RegexpReplace: lambda self, e: self.func( 692 "REGEXP_REPLACE", 693 e.this, 694 e.expression, 695 e.args.get("replacement"), 696 e.args.get("modifiers"), 697 ), 698 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 699 exp.RegexpILike: lambda self, e: self.func( 700 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 701 ), 702 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 703 exp.Return: lambda self, e: self.sql(e, "this"), 704 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 705 exp.Rand: rename_func("RANDOM"), 706 exp.SHA: rename_func("SHA1"), 707 exp.SHA2: sha256_sql, 708 exp.Split: rename_func("STR_SPLIT"), 709 exp.SortArray: _sort_array_sql, 710 exp.StrPosition: strposition_sql, 711 exp.StrToUnix: lambda self, e: self.func( 712 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 713 ), 714 exp.Struct: _struct_sql, 715 exp.Transform: rename_func("LIST_TRANSFORM"), 716 exp.TimeAdd: _date_delta_sql, 717 exp.Time: no_time_sql, 718 exp.TimeDiff: _timediff_sql, 719 exp.Timestamp: no_timestamp_sql, 720 exp.TimestampDiff: lambda self, e: self.func( 721 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 722 ), 723 exp.TimestampTrunc: timestamptrunc_sql(), 724 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 725 exp.TimeStrToTime: timestrtotime_sql, 726 exp.TimeStrToUnix: lambda self, e: self.func( 727 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 728 ), 729 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 730 exp.TimeToUnix: rename_func("EPOCH"), 731 exp.TsOrDiToDi: lambda self, 732 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 733 exp.TsOrDsAdd: _date_delta_sql, 734 exp.TsOrDsDiff: lambda self, e: self.func( 735 "DATE_DIFF", 736 f"'{e.args.get('unit') or 'DAY'}'", 737 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 738 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 739 ), 740 exp.UnixToStr: lambda self, e: self.func( 741 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 742 ), 743 exp.DatetimeTrunc: lambda self, e: self.func( 744 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 745 ), 746 exp.UnixToTime: _unix_to_time_sql, 747 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 748 exp.VariancePop: rename_func("VAR_POP"), 749 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 750 exp.Xor: bool_xor_sql, 751 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 752 rename_func("LEVENSHTEIN") 753 ), 754 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 755 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 756 exp.DateBin: rename_func("TIME_BUCKET"), 757 } 758 759 SUPPORTED_JSON_PATH_PARTS = { 760 exp.JSONPathKey, 761 exp.JSONPathRoot, 762 exp.JSONPathSubscript, 763 exp.JSONPathWildcard, 764 } 765 766 TYPE_MAPPING = { 767 **generator.Generator.TYPE_MAPPING, 768 exp.DataType.Type.BINARY: "BLOB", 769 exp.DataType.Type.BPCHAR: "TEXT", 770 exp.DataType.Type.CHAR: "TEXT", 771 exp.DataType.Type.DATETIME: "TIMESTAMP", 772 exp.DataType.Type.FLOAT: "REAL", 773 exp.DataType.Type.JSONB: "JSON", 774 exp.DataType.Type.NCHAR: "TEXT", 775 exp.DataType.Type.NVARCHAR: "TEXT", 776 exp.DataType.Type.UINT: "UINTEGER", 777 exp.DataType.Type.VARBINARY: "BLOB", 778 exp.DataType.Type.ROWVERSION: "BLOB", 779 exp.DataType.Type.VARCHAR: "TEXT", 780 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 781 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 782 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 783 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 784 } 785 786 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 787 RESERVED_KEYWORDS = { 788 "array", 789 "analyse", 790 "union", 791 "all", 792 "when", 793 "in_p", 794 "default", 795 "create_p", 796 "window", 797 "asymmetric", 798 "to", 799 "else", 800 "localtime", 801 "from", 802 "end_p", 803 "select", 804 "current_date", 805 "foreign", 806 "with", 807 "grant", 808 "session_user", 809 "or", 810 "except", 811 "references", 812 "fetch", 813 "limit", 814 "group_p", 815 "leading", 816 "into", 817 "collate", 818 "offset", 819 "do", 820 "then", 821 "localtimestamp", 822 "check_p", 823 "lateral_p", 824 "current_role", 825 "where", 826 "asc_p", 827 "placing", 828 "desc_p", 829 "user", 830 "unique", 831 "initially", 832 "column", 833 "both", 834 "some", 835 "as", 836 "any", 837 "only", 838 "deferrable", 839 "null_p", 840 "current_time", 841 "true_p", 842 "table", 843 "case", 844 "trailing", 845 "variadic", 846 "for", 847 "on", 848 "distinct", 849 "false_p", 850 "not", 851 "constraint", 852 "current_timestamp", 853 "returning", 854 "primary", 855 "intersect", 856 "having", 857 "analyze", 858 "current_user", 859 "and", 860 "cast", 861 "symmetric", 862 "using", 863 "order", 864 "current_catalog", 865 } 866 867 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 868 869 # DuckDB doesn't generally support CREATE TABLE .. properties 870 # https://duckdb.org/docs/sql/statements/create_table.html 871 PROPERTIES_LOCATION = { 872 prop: exp.Properties.Location.UNSUPPORTED 873 for prop in generator.Generator.PROPERTIES_LOCATION 874 } 875 876 # There are a few exceptions (e.g. temporary tables) which are supported or 877 # can be transpiled to DuckDB, so we explicitly override them accordingly 878 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 879 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 880 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 881 882 def show_sql(self, expression: exp.Show) -> str: 883 return f"SHOW {expression.name}" 884 885 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 886 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 887 888 def strtotime_sql(self, expression: exp.StrToTime) -> str: 889 if expression.args.get("safe"): 890 formatted_time = self.format_time(expression) 891 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 892 return str_to_time_sql(self, expression) 893 894 def strtodate_sql(self, expression: exp.StrToDate) -> str: 895 if expression.args.get("safe"): 896 formatted_time = self.format_time(expression) 897 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 898 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 899 900 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 901 arg = expression.this 902 if expression.args.get("safe"): 903 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 904 return self.func("JSON", arg) 905 906 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 907 nano = expression.args.get("nano") 908 if nano is not None: 909 expression.set( 910 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 911 ) 912 913 return rename_func("MAKE_TIME")(self, expression) 914 915 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 916 sec = expression.args["sec"] 917 918 milli = expression.args.get("milli") 919 if milli is not None: 920 sec += milli.pop() / exp.Literal.number(1000.0) 921 922 nano = expression.args.get("nano") 923 if nano is not None: 924 sec += nano.pop() / exp.Literal.number(1000000000.0) 925 926 if milli or nano: 927 expression.set("sec", sec) 928 929 return rename_func("MAKE_TIMESTAMP")(self, expression) 930 931 def tablesample_sql( 932 self, 933 expression: exp.TableSample, 934 tablesample_keyword: t.Optional[str] = None, 935 ) -> str: 936 if not isinstance(expression.parent, exp.Select): 937 # This sample clause only applies to a single source, not the entire resulting relation 938 tablesample_keyword = "TABLESAMPLE" 939 940 if expression.args.get("size"): 941 method = expression.args.get("method") 942 if method and method.name.upper() != "RESERVOIR": 943 self.unsupported( 944 f"Sampling method {method} is not supported with a discrete sample count, " 945 "defaulting to reservoir sampling" 946 ) 947 expression.set("method", exp.var("RESERVOIR")) 948 949 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 950 951 def interval_sql(self, expression: exp.Interval) -> str: 952 multiplier: t.Optional[int] = None 953 unit = expression.text("unit").lower() 954 955 if unit.startswith("week"): 956 multiplier = 7 957 if unit.startswith("quarter"): 958 multiplier = 90 959 960 if multiplier: 961 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 962 963 return super().interval_sql(expression) 964 965 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 966 if isinstance(expression.parent, exp.UserDefinedFunction): 967 return self.sql(expression, "this") 968 return super().columndef_sql(expression, sep) 969 970 def join_sql(self, expression: exp.Join) -> str: 971 if ( 972 expression.side == "LEFT" 973 and not expression.args.get("on") 974 and isinstance(expression.this, exp.Unnest) 975 ): 976 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 977 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 978 return super().join_sql(expression.on(exp.true())) 979 980 return super().join_sql(expression) 981 982 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 983 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 984 if expression.args.get("is_end_exclusive"): 985 return rename_func("RANGE")(self, expression) 986 987 return self.function_fallback_sql(expression) 988 989 def countif_sql(self, expression: exp.CountIf) -> str: 990 if self.dialect.version >= Version("1.2"): 991 return self.function_fallback_sql(expression) 992 993 # https://github.com/tobymao/sqlglot/pull/4749 994 return count_if_to_sum(self, expression) 995 996 def bracket_sql(self, expression: exp.Bracket) -> str: 997 if self.dialect.version >= Version("1.2"): 998 return super().bracket_sql(expression) 999 1000 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1001 this = expression.this 1002 if isinstance(this, exp.Array): 1003 this.replace(exp.paren(this)) 1004 1005 bracket = super().bracket_sql(expression) 1006 1007 if not expression.args.get("returns_list_for_maps"): 1008 if not this.type: 1009 from sqlglot.optimizer.annotate_types import annotate_types 1010 1011 this = annotate_types(this, dialect=self.dialect) 1012 1013 if this.is_type(exp.DataType.Type.MAP): 1014 bracket = f"({bracket})[1]" 1015 1016 return bracket 1017 1018 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1019 expression_sql = self.sql(expression, "expression") 1020 1021 func = expression.this 1022 if isinstance(func, exp.PERCENTILES): 1023 # Make the order key the first arg and slide the fraction to the right 1024 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1025 order_col = expression.find(exp.Ordered) 1026 if order_col: 1027 func.set("expression", func.this) 1028 func.set("this", order_col.this) 1029 1030 this = self.sql(expression, "this").rstrip(")") 1031 1032 return f"{this}{expression_sql})" 1033 1034 def length_sql(self, expression: exp.Length) -> str: 1035 arg = expression.this 1036 1037 # Dialects like BQ and Snowflake also accept binary values as args, so 1038 # DDB will attempt to infer the type or resort to case/when resolution 1039 if not expression.args.get("binary") or arg.is_string: 1040 return self.func("LENGTH", arg) 1041 1042 if not arg.type: 1043 from sqlglot.optimizer.annotate_types import annotate_types 1044 1045 arg = annotate_types(arg, dialect=self.dialect) 1046 1047 if arg.is_type(*exp.DataType.TEXT_TYPES): 1048 return self.func("LENGTH", arg) 1049 1050 # We need these casts to make duckdb's static type checker happy 1051 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1052 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1053 1054 case = ( 1055 exp.case(self.func("TYPEOF", arg)) 1056 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1057 .else_( 1058 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1059 ) # anonymous to break length_sql recursion 1060 ) 1061 1062 return self.sql(case) 1063 1064 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1065 this = expression.this 1066 key = expression.args.get("key") 1067 key_sql = key.name if isinstance(key, exp.Expression) else "" 1068 value_sql = self.sql(expression, "value") 1069 1070 kv_sql = f"{key_sql} := {value_sql}" 1071 1072 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1073 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1074 if isinstance(this, exp.Struct) and not this.expressions: 1075 return self.func("STRUCT_PACK", kv_sql) 1076 1077 return self.func("STRUCT_INSERT", this, kv_sql) 1078 1079 def unnest_sql(self, expression: exp.Unnest) -> str: 1080 explode_array = expression.args.get("explode_array") 1081 if explode_array: 1082 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1083 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1084 expression.expressions.append( 1085 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1086 ) 1087 1088 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1089 alias = expression.args.get("alias") 1090 if alias: 1091 expression.set("alias", None) 1092 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1093 1094 unnest_sql = super().unnest_sql(expression) 1095 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1096 return self.sql(select) 1097 1098 return super().unnest_sql(expression) 1099 1100 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1101 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1102 # DuckDB should render IGNORE NULLS only for the general-purpose 1103 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1104 return super().ignorenulls_sql(expression) 1105 1106 return self.sql(expression, "this") 1107 1108 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1109 this = self.sql(expression, "this") 1110 null_text = self.sql(expression, "null") 1111 1112 if null_text: 1113 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1114 1115 return self.func("ARRAY_TO_STRING", this, expression.expression) 1116 1117 @unsupported_args("position", "occurrence") 1118 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1119 group = expression.args.get("group") 1120 params = expression.args.get("parameters") 1121 1122 # Do not render group if there is no following argument, 1123 # and it's the default value for this dialect 1124 if ( 1125 not params 1126 and group 1127 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1128 ): 1129 group = None 1130 return self.func( 1131 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1132 ) 1133 1134 @unsupported_args("culture") 1135 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1136 fmt = expression.args.get("format") 1137 if fmt and fmt.is_int: 1138 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1139 1140 self.unsupported("Only integer formats are supported by NumberToStr") 1141 return self.function_fallback_sql(expression) 1142 1143 def autoincrementcolumnconstraint_sql(self, _) -> str: 1144 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1145 return ""
287class DuckDB(Dialect): 288 NULL_ORDERING = "nulls_are_last" 289 SUPPORTS_USER_DEFINED_TYPES = True 290 SAFE_DIVISION = True 291 INDEX_OFFSET = 1 292 CONCAT_COALESCE = True 293 SUPPORTS_ORDER_BY_ALL = True 294 SUPPORTS_FIXED_SIZE_ARRAYS = True 295 STRICT_JSON_PATH_SYNTAX = False 296 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 297 298 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 299 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 300 301 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 302 if isinstance(path, exp.Literal): 303 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 304 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 305 # This check ensures we'll avoid trying to parse these as JSON paths, which can 306 # either result in a noisy warning or in an invalid representation of the path. 307 path_text = path.name 308 if path_text.startswith("/") or "[#" in path_text: 309 return path 310 311 return super().to_json_path(path) 312 313 class Tokenizer(tokens.Tokenizer): 314 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 315 HEREDOC_STRINGS = ["$"] 316 317 HEREDOC_TAG_IS_IDENTIFIER = True 318 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 319 320 KEYWORDS = { 321 **tokens.Tokenizer.KEYWORDS, 322 "//": TokenType.DIV, 323 "**": TokenType.DSTAR, 324 "^@": TokenType.CARET_AT, 325 "@>": TokenType.AT_GT, 326 "<@": TokenType.LT_AT, 327 "ATTACH": TokenType.ATTACH, 328 "BINARY": TokenType.VARBINARY, 329 "BITSTRING": TokenType.BIT, 330 "BPCHAR": TokenType.TEXT, 331 "CHAR": TokenType.TEXT, 332 "CHARACTER VARYING": TokenType.TEXT, 333 "DETACH": TokenType.DETACH, 334 "EXCLUDE": TokenType.EXCEPT, 335 "LOGICAL": TokenType.BOOLEAN, 336 "ONLY": TokenType.ONLY, 337 "PIVOT_WIDER": TokenType.PIVOT, 338 "POSITIONAL": TokenType.POSITIONAL, 339 "SIGNED": TokenType.INT, 340 "STRING": TokenType.TEXT, 341 "SUMMARIZE": TokenType.SUMMARIZE, 342 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 343 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 344 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 345 "TIMESTAMP_US": TokenType.TIMESTAMP, 346 "UBIGINT": TokenType.UBIGINT, 347 "UINTEGER": TokenType.UINT, 348 "USMALLINT": TokenType.USMALLINT, 349 "UTINYINT": TokenType.UTINYINT, 350 "VARCHAR": TokenType.TEXT, 351 } 352 KEYWORDS.pop("/*+") 353 354 SINGLE_TOKENS = { 355 **tokens.Tokenizer.SINGLE_TOKENS, 356 "$": TokenType.PARAMETER, 357 } 358 359 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 360 361 class Parser(parser.Parser): 362 BITWISE = { 363 **parser.Parser.BITWISE, 364 TokenType.TILDA: exp.RegexpLike, 365 } 366 BITWISE.pop(TokenType.CARET) 367 368 RANGE_PARSERS = { 369 **parser.Parser.RANGE_PARSERS, 370 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 371 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 372 } 373 374 EXPONENT = { 375 **parser.Parser.EXPONENT, 376 TokenType.CARET: exp.Pow, 377 TokenType.DSTAR: exp.Pow, 378 } 379 380 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 381 382 SHOW_PARSERS = { 383 "TABLES": _show_parser("TABLES"), 384 "ALL TABLES": _show_parser("ALL TABLES"), 385 } 386 387 FUNCTIONS = { 388 **parser.Parser.FUNCTIONS, 389 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 390 "ARRAY_SORT": exp.SortArray.from_arg_list, 391 "DATEDIFF": _build_date_diff, 392 "DATE_DIFF": _build_date_diff, 393 "DATE_TRUNC": date_trunc_to_time, 394 "DATETRUNC": date_trunc_to_time, 395 "DECODE": lambda args: exp.Decode( 396 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 397 ), 398 "EDITDIST3": exp.Levenshtein.from_arg_list, 399 "ENCODE": lambda args: exp.Encode( 400 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 401 ), 402 "EPOCH": exp.TimeToUnix.from_arg_list, 403 "EPOCH_MS": lambda args: exp.UnixToTime( 404 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 405 ), 406 "GENERATE_SERIES": _build_generate_series(), 407 "JSON": exp.ParseJSON.from_arg_list, 408 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 409 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 410 "LIST_HAS": exp.ArrayContains.from_arg_list, 411 "LIST_REVERSE_SORT": _build_sort_array_desc, 412 "LIST_SORT": exp.SortArray.from_arg_list, 413 "LIST_VALUE": lambda args: exp.Array(expressions=args), 414 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 415 "MAKE_TIMESTAMP": _build_make_timestamp, 416 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 417 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 418 "RANGE": _build_generate_series(end_exclusive=True), 419 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 420 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 421 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 422 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 423 this=seq_get(args, 0), 424 expression=seq_get(args, 1), 425 replacement=seq_get(args, 2), 426 modifiers=seq_get(args, 3), 427 ), 428 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 429 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 430 "STRING_SPLIT": exp.Split.from_arg_list, 431 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 432 "STRING_TO_ARRAY": exp.Split.from_arg_list, 433 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 434 "STRUCT_PACK": exp.Struct.from_arg_list, 435 "STR_SPLIT": exp.Split.from_arg_list, 436 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 437 "TIME_BUCKET": exp.DateBin.from_arg_list, 438 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 439 "UNNEST": exp.Explode.from_arg_list, 440 "XOR": binary_from_function(exp.BitwiseXor), 441 } 442 443 FUNCTIONS.pop("DATE_SUB") 444 FUNCTIONS.pop("GLOB") 445 446 FUNCTION_PARSERS = { 447 **parser.Parser.FUNCTION_PARSERS, 448 **dict.fromkeys( 449 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 450 ), 451 } 452 FUNCTION_PARSERS.pop("DECODE") 453 454 NO_PAREN_FUNCTION_PARSERS = { 455 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 456 "MAP": lambda self: self._parse_map(), 457 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 458 } 459 460 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 461 TokenType.SEMI, 462 TokenType.ANTI, 463 } 464 465 PLACEHOLDER_PARSERS = { 466 **parser.Parser.PLACEHOLDER_PARSERS, 467 TokenType.PARAMETER: lambda self: ( 468 self.expression(exp.Placeholder, this=self._prev.text) 469 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 470 else None 471 ), 472 } 473 474 TYPE_CONVERTERS = { 475 # https://duckdb.org/docs/sql/data_types/numeric 476 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 477 # https://duckdb.org/docs/sql/data_types/text 478 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 479 } 480 481 STATEMENT_PARSERS = { 482 **parser.Parser.STATEMENT_PARSERS, 483 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 484 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 485 TokenType.SHOW: lambda self: self._parse_show(), 486 } 487 488 def _parse_expression(self) -> t.Optional[exp.Expression]: 489 # DuckDB supports prefix aliases, e.g. foo: 1 490 if self._next and self._next.token_type == TokenType.COLON: 491 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 492 self._match(TokenType.COLON) 493 comments = self._prev_comments or [] 494 495 this = self._parse_assignment() 496 if isinstance(this, exp.Expression): 497 # Moves the comment next to the alias in `alias: expr /* comment */` 498 comments += this.pop_comments() or [] 499 500 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 501 502 return super()._parse_expression() 503 504 def _parse_table( 505 self, 506 schema: bool = False, 507 joins: bool = False, 508 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 509 parse_bracket: bool = False, 510 is_db_reference: bool = False, 511 parse_partition: bool = False, 512 ) -> t.Optional[exp.Expression]: 513 # DuckDB supports prefix aliases, e.g. FROM foo: bar 514 if self._next and self._next.token_type == TokenType.COLON: 515 alias = self._parse_table_alias( 516 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 517 ) 518 self._match(TokenType.COLON) 519 comments = self._prev_comments or [] 520 else: 521 alias = None 522 comments = [] 523 524 table = super()._parse_table( 525 schema=schema, 526 joins=joins, 527 alias_tokens=alias_tokens, 528 parse_bracket=parse_bracket, 529 is_db_reference=is_db_reference, 530 parse_partition=parse_partition, 531 ) 532 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 533 # Moves the comment next to the alias in `alias: table /* comment */` 534 comments += table.pop_comments() or [] 535 alias.comments = alias.pop_comments() + comments 536 table.set("alias", alias) 537 538 return table 539 540 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 541 # https://duckdb.org/docs/sql/samples.html 542 sample = super()._parse_table_sample(as_modifier=as_modifier) 543 if sample and not sample.args.get("method"): 544 if sample.args.get("size"): 545 sample.set("method", exp.var("RESERVOIR")) 546 else: 547 sample.set("method", exp.var("SYSTEM")) 548 549 return sample 550 551 def _parse_bracket( 552 self, this: t.Optional[exp.Expression] = None 553 ) -> t.Optional[exp.Expression]: 554 bracket = super()._parse_bracket(this) 555 556 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 557 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 558 bracket.set("returns_list_for_maps", True) 559 560 return bracket 561 562 def _parse_map(self) -> exp.ToMap | exp.Map: 563 if self._match(TokenType.L_BRACE, advance=False): 564 return self.expression(exp.ToMap, this=self._parse_bracket()) 565 566 args = self._parse_wrapped_csv(self._parse_assignment) 567 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 568 569 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 570 return self._parse_field_def() 571 572 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 573 if len(aggregations) == 1: 574 return super()._pivot_column_names(aggregations) 575 return pivot_column_names(aggregations, dialect="duckdb") 576 577 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 578 def _parse_attach_option() -> exp.AttachOption: 579 return self.expression( 580 exp.AttachOption, 581 this=self._parse_var(any_token=True), 582 expression=self._parse_field(any_token=True), 583 ) 584 585 self._match(TokenType.DATABASE) 586 exists = self._parse_exists(not_=is_attach) 587 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 588 589 if self._match(TokenType.L_PAREN, advance=False): 590 expressions = self._parse_wrapped_csv(_parse_attach_option) 591 else: 592 expressions = None 593 594 return ( 595 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 596 if is_attach 597 else self.expression(exp.Detach, this=this, exists=exists) 598 ) 599 600 def _parse_show_duckdb(self, this: str) -> exp.Show: 601 return self.expression(exp.Show, this=this) 602 603 class Generator(generator.Generator): 604 PARAMETER_TOKEN = "$" 605 NAMED_PLACEHOLDER_TOKEN = "$" 606 JOIN_HINTS = False 607 TABLE_HINTS = False 608 QUERY_HINTS = False 609 LIMIT_FETCH = "LIMIT" 610 STRUCT_DELIMITER = ("(", ")") 611 RENAME_TABLE_WITH_DB = False 612 NVL2_SUPPORTED = False 613 SEMI_ANTI_JOIN_WITH_SIDE = False 614 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 615 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 616 LAST_DAY_SUPPORTS_DATE_PART = False 617 JSON_KEY_VALUE_PAIR_SEP = "," 618 IGNORE_NULLS_IN_FUNC = True 619 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 620 SUPPORTS_CREATE_TABLE_LIKE = False 621 MULTI_ARG_DISTINCT = False 622 CAN_IMPLEMENT_ARRAY_ANY = True 623 SUPPORTS_TO_NUMBER = False 624 COPY_HAS_INTO_KEYWORD = False 625 STAR_EXCEPT = "EXCLUDE" 626 PAD_FILL_PATTERN_IS_REQUIRED = True 627 ARRAY_CONCAT_IS_VAR_LEN = False 628 ARRAY_SIZE_DIM_REQUIRED = False 629 630 TRANSFORMS = { 631 **generator.Generator.TRANSFORMS, 632 exp.ApproxDistinct: approx_count_distinct_sql, 633 exp.Array: inline_array_unless_query, 634 exp.ArrayFilter: rename_func("LIST_FILTER"), 635 exp.ArraySort: _array_sort_sql, 636 exp.ArraySum: rename_func("LIST_SUM"), 637 exp.BitwiseXor: rename_func("XOR"), 638 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 639 exp.CurrentDate: lambda *_: "CURRENT_DATE", 640 exp.CurrentTime: lambda *_: "CURRENT_TIME", 641 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 642 exp.DayOfMonth: rename_func("DAYOFMONTH"), 643 exp.DayOfWeek: rename_func("DAYOFWEEK"), 644 exp.DayOfWeekIso: rename_func("ISODOW"), 645 exp.DayOfYear: rename_func("DAYOFYEAR"), 646 exp.DataType: _datatype_sql, 647 exp.Date: _date_sql, 648 exp.DateAdd: _date_delta_sql, 649 exp.DateFromParts: rename_func("MAKE_DATE"), 650 exp.DateSub: _date_delta_sql, 651 exp.DateDiff: _date_diff_sql, 652 exp.DateStrToDate: datestrtodate_sql, 653 exp.Datetime: no_datetime_sql, 654 exp.DatetimeSub: _date_delta_sql, 655 exp.DatetimeAdd: _date_delta_sql, 656 exp.DateToDi: lambda self, 657 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 658 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 659 exp.DiToDate: lambda self, 660 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 661 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 662 exp.GenerateDateArray: _generate_datetime_array_sql, 663 exp.GenerateTimestampArray: _generate_datetime_array_sql, 664 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 665 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 666 exp.Explode: rename_func("UNNEST"), 667 exp.IntDiv: lambda self, e: self.binary(e, "//"), 668 exp.IsInf: rename_func("ISINF"), 669 exp.IsNan: rename_func("ISNAN"), 670 exp.JSONBExists: rename_func("JSON_EXISTS"), 671 exp.JSONExtract: _arrow_json_extract_sql, 672 exp.JSONExtractArray: _json_extract_value_array_sql, 673 exp.JSONExtractScalar: _arrow_json_extract_sql, 674 exp.JSONFormat: _json_format_sql, 675 exp.JSONValueArray: _json_extract_value_array_sql, 676 exp.Lateral: explode_to_unnest_sql, 677 exp.LogicalOr: rename_func("BOOL_OR"), 678 exp.LogicalAnd: rename_func("BOOL_AND"), 679 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 680 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 681 exp.MonthsBetween: lambda self, e: self.func( 682 "DATEDIFF", 683 "'month'", 684 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 685 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 686 ), 687 exp.PercentileCont: rename_func("QUANTILE_CONT"), 688 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 689 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 690 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 691 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 692 exp.RegexpReplace: lambda self, e: self.func( 693 "REGEXP_REPLACE", 694 e.this, 695 e.expression, 696 e.args.get("replacement"), 697 e.args.get("modifiers"), 698 ), 699 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 700 exp.RegexpILike: lambda self, e: self.func( 701 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 702 ), 703 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 704 exp.Return: lambda self, e: self.sql(e, "this"), 705 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 706 exp.Rand: rename_func("RANDOM"), 707 exp.SHA: rename_func("SHA1"), 708 exp.SHA2: sha256_sql, 709 exp.Split: rename_func("STR_SPLIT"), 710 exp.SortArray: _sort_array_sql, 711 exp.StrPosition: strposition_sql, 712 exp.StrToUnix: lambda self, e: self.func( 713 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 714 ), 715 exp.Struct: _struct_sql, 716 exp.Transform: rename_func("LIST_TRANSFORM"), 717 exp.TimeAdd: _date_delta_sql, 718 exp.Time: no_time_sql, 719 exp.TimeDiff: _timediff_sql, 720 exp.Timestamp: no_timestamp_sql, 721 exp.TimestampDiff: lambda self, e: self.func( 722 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 723 ), 724 exp.TimestampTrunc: timestamptrunc_sql(), 725 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 726 exp.TimeStrToTime: timestrtotime_sql, 727 exp.TimeStrToUnix: lambda self, e: self.func( 728 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 729 ), 730 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 731 exp.TimeToUnix: rename_func("EPOCH"), 732 exp.TsOrDiToDi: lambda self, 733 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 734 exp.TsOrDsAdd: _date_delta_sql, 735 exp.TsOrDsDiff: lambda self, e: self.func( 736 "DATE_DIFF", 737 f"'{e.args.get('unit') or 'DAY'}'", 738 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 739 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 740 ), 741 exp.UnixToStr: lambda self, e: self.func( 742 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 743 ), 744 exp.DatetimeTrunc: lambda self, e: self.func( 745 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 746 ), 747 exp.UnixToTime: _unix_to_time_sql, 748 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 749 exp.VariancePop: rename_func("VAR_POP"), 750 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 751 exp.Xor: bool_xor_sql, 752 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 753 rename_func("LEVENSHTEIN") 754 ), 755 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 756 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 757 exp.DateBin: rename_func("TIME_BUCKET"), 758 } 759 760 SUPPORTED_JSON_PATH_PARTS = { 761 exp.JSONPathKey, 762 exp.JSONPathRoot, 763 exp.JSONPathSubscript, 764 exp.JSONPathWildcard, 765 } 766 767 TYPE_MAPPING = { 768 **generator.Generator.TYPE_MAPPING, 769 exp.DataType.Type.BINARY: "BLOB", 770 exp.DataType.Type.BPCHAR: "TEXT", 771 exp.DataType.Type.CHAR: "TEXT", 772 exp.DataType.Type.DATETIME: "TIMESTAMP", 773 exp.DataType.Type.FLOAT: "REAL", 774 exp.DataType.Type.JSONB: "JSON", 775 exp.DataType.Type.NCHAR: "TEXT", 776 exp.DataType.Type.NVARCHAR: "TEXT", 777 exp.DataType.Type.UINT: "UINTEGER", 778 exp.DataType.Type.VARBINARY: "BLOB", 779 exp.DataType.Type.ROWVERSION: "BLOB", 780 exp.DataType.Type.VARCHAR: "TEXT", 781 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 782 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 783 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 784 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 785 } 786 787 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 788 RESERVED_KEYWORDS = { 789 "array", 790 "analyse", 791 "union", 792 "all", 793 "when", 794 "in_p", 795 "default", 796 "create_p", 797 "window", 798 "asymmetric", 799 "to", 800 "else", 801 "localtime", 802 "from", 803 "end_p", 804 "select", 805 "current_date", 806 "foreign", 807 "with", 808 "grant", 809 "session_user", 810 "or", 811 "except", 812 "references", 813 "fetch", 814 "limit", 815 "group_p", 816 "leading", 817 "into", 818 "collate", 819 "offset", 820 "do", 821 "then", 822 "localtimestamp", 823 "check_p", 824 "lateral_p", 825 "current_role", 826 "where", 827 "asc_p", 828 "placing", 829 "desc_p", 830 "user", 831 "unique", 832 "initially", 833 "column", 834 "both", 835 "some", 836 "as", 837 "any", 838 "only", 839 "deferrable", 840 "null_p", 841 "current_time", 842 "true_p", 843 "table", 844 "case", 845 "trailing", 846 "variadic", 847 "for", 848 "on", 849 "distinct", 850 "false_p", 851 "not", 852 "constraint", 853 "current_timestamp", 854 "returning", 855 "primary", 856 "intersect", 857 "having", 858 "analyze", 859 "current_user", 860 "and", 861 "cast", 862 "symmetric", 863 "using", 864 "order", 865 "current_catalog", 866 } 867 868 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 869 870 # DuckDB doesn't generally support CREATE TABLE .. properties 871 # https://duckdb.org/docs/sql/statements/create_table.html 872 PROPERTIES_LOCATION = { 873 prop: exp.Properties.Location.UNSUPPORTED 874 for prop in generator.Generator.PROPERTIES_LOCATION 875 } 876 877 # There are a few exceptions (e.g. temporary tables) which are supported or 878 # can be transpiled to DuckDB, so we explicitly override them accordingly 879 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 880 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 881 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 882 883 def show_sql(self, expression: exp.Show) -> str: 884 return f"SHOW {expression.name}" 885 886 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 887 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 888 889 def strtotime_sql(self, expression: exp.StrToTime) -> str: 890 if expression.args.get("safe"): 891 formatted_time = self.format_time(expression) 892 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 893 return str_to_time_sql(self, expression) 894 895 def strtodate_sql(self, expression: exp.StrToDate) -> str: 896 if expression.args.get("safe"): 897 formatted_time = self.format_time(expression) 898 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 899 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 900 901 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 902 arg = expression.this 903 if expression.args.get("safe"): 904 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 905 return self.func("JSON", arg) 906 907 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 908 nano = expression.args.get("nano") 909 if nano is not None: 910 expression.set( 911 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 912 ) 913 914 return rename_func("MAKE_TIME")(self, expression) 915 916 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 917 sec = expression.args["sec"] 918 919 milli = expression.args.get("milli") 920 if milli is not None: 921 sec += milli.pop() / exp.Literal.number(1000.0) 922 923 nano = expression.args.get("nano") 924 if nano is not None: 925 sec += nano.pop() / exp.Literal.number(1000000000.0) 926 927 if milli or nano: 928 expression.set("sec", sec) 929 930 return rename_func("MAKE_TIMESTAMP")(self, expression) 931 932 def tablesample_sql( 933 self, 934 expression: exp.TableSample, 935 tablesample_keyword: t.Optional[str] = None, 936 ) -> str: 937 if not isinstance(expression.parent, exp.Select): 938 # This sample clause only applies to a single source, not the entire resulting relation 939 tablesample_keyword = "TABLESAMPLE" 940 941 if expression.args.get("size"): 942 method = expression.args.get("method") 943 if method and method.name.upper() != "RESERVOIR": 944 self.unsupported( 945 f"Sampling method {method} is not supported with a discrete sample count, " 946 "defaulting to reservoir sampling" 947 ) 948 expression.set("method", exp.var("RESERVOIR")) 949 950 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 951 952 def interval_sql(self, expression: exp.Interval) -> str: 953 multiplier: t.Optional[int] = None 954 unit = expression.text("unit").lower() 955 956 if unit.startswith("week"): 957 multiplier = 7 958 if unit.startswith("quarter"): 959 multiplier = 90 960 961 if multiplier: 962 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 963 964 return super().interval_sql(expression) 965 966 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 967 if isinstance(expression.parent, exp.UserDefinedFunction): 968 return self.sql(expression, "this") 969 return super().columndef_sql(expression, sep) 970 971 def join_sql(self, expression: exp.Join) -> str: 972 if ( 973 expression.side == "LEFT" 974 and not expression.args.get("on") 975 and isinstance(expression.this, exp.Unnest) 976 ): 977 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 978 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 979 return super().join_sql(expression.on(exp.true())) 980 981 return super().join_sql(expression) 982 983 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 984 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 985 if expression.args.get("is_end_exclusive"): 986 return rename_func("RANGE")(self, expression) 987 988 return self.function_fallback_sql(expression) 989 990 def countif_sql(self, expression: exp.CountIf) -> str: 991 if self.dialect.version >= Version("1.2"): 992 return self.function_fallback_sql(expression) 993 994 # https://github.com/tobymao/sqlglot/pull/4749 995 return count_if_to_sum(self, expression) 996 997 def bracket_sql(self, expression: exp.Bracket) -> str: 998 if self.dialect.version >= Version("1.2"): 999 return super().bracket_sql(expression) 1000 1001 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1002 this = expression.this 1003 if isinstance(this, exp.Array): 1004 this.replace(exp.paren(this)) 1005 1006 bracket = super().bracket_sql(expression) 1007 1008 if not expression.args.get("returns_list_for_maps"): 1009 if not this.type: 1010 from sqlglot.optimizer.annotate_types import annotate_types 1011 1012 this = annotate_types(this, dialect=self.dialect) 1013 1014 if this.is_type(exp.DataType.Type.MAP): 1015 bracket = f"({bracket})[1]" 1016 1017 return bracket 1018 1019 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1020 expression_sql = self.sql(expression, "expression") 1021 1022 func = expression.this 1023 if isinstance(func, exp.PERCENTILES): 1024 # Make the order key the first arg and slide the fraction to the right 1025 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1026 order_col = expression.find(exp.Ordered) 1027 if order_col: 1028 func.set("expression", func.this) 1029 func.set("this", order_col.this) 1030 1031 this = self.sql(expression, "this").rstrip(")") 1032 1033 return f"{this}{expression_sql})" 1034 1035 def length_sql(self, expression: exp.Length) -> str: 1036 arg = expression.this 1037 1038 # Dialects like BQ and Snowflake also accept binary values as args, so 1039 # DDB will attempt to infer the type or resort to case/when resolution 1040 if not expression.args.get("binary") or arg.is_string: 1041 return self.func("LENGTH", arg) 1042 1043 if not arg.type: 1044 from sqlglot.optimizer.annotate_types import annotate_types 1045 1046 arg = annotate_types(arg, dialect=self.dialect) 1047 1048 if arg.is_type(*exp.DataType.TEXT_TYPES): 1049 return self.func("LENGTH", arg) 1050 1051 # We need these casts to make duckdb's static type checker happy 1052 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1053 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1054 1055 case = ( 1056 exp.case(self.func("TYPEOF", arg)) 1057 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1058 .else_( 1059 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1060 ) # anonymous to break length_sql recursion 1061 ) 1062 1063 return self.sql(case) 1064 1065 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1066 this = expression.this 1067 key = expression.args.get("key") 1068 key_sql = key.name if isinstance(key, exp.Expression) else "" 1069 value_sql = self.sql(expression, "value") 1070 1071 kv_sql = f"{key_sql} := {value_sql}" 1072 1073 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1074 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1075 if isinstance(this, exp.Struct) and not this.expressions: 1076 return self.func("STRUCT_PACK", kv_sql) 1077 1078 return self.func("STRUCT_INSERT", this, kv_sql) 1079 1080 def unnest_sql(self, expression: exp.Unnest) -> str: 1081 explode_array = expression.args.get("explode_array") 1082 if explode_array: 1083 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1084 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1085 expression.expressions.append( 1086 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1087 ) 1088 1089 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1090 alias = expression.args.get("alias") 1091 if alias: 1092 expression.set("alias", None) 1093 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1094 1095 unnest_sql = super().unnest_sql(expression) 1096 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1097 return self.sql(select) 1098 1099 return super().unnest_sql(expression) 1100 1101 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1102 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1103 # DuckDB should render IGNORE NULLS only for the general-purpose 1104 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1105 return super().ignorenulls_sql(expression) 1106 1107 return self.sql(expression, "this") 1108 1109 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1110 this = self.sql(expression, "this") 1111 null_text = self.sql(expression, "null") 1112 1113 if null_text: 1114 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1115 1116 return self.func("ARRAY_TO_STRING", this, expression.expression) 1117 1118 @unsupported_args("position", "occurrence") 1119 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1120 group = expression.args.get("group") 1121 params = expression.args.get("parameters") 1122 1123 # Do not render group if there is no following argument, 1124 # and it's the default value for this dialect 1125 if ( 1126 not params 1127 and group 1128 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1129 ): 1130 group = None 1131 return self.func( 1132 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1133 ) 1134 1135 @unsupported_args("culture") 1136 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1137 fmt = expression.args.get("format") 1138 if fmt and fmt.is_int: 1139 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1140 1141 self.unsupported("Only integer formats are supported by NumberToStr") 1142 return self.function_fallback_sql(expression) 1143 1144 def autoincrementcolumnconstraint_sql(self, _) -> str: 1145 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1146 return ""
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
301 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 302 if isinstance(path, exp.Literal): 303 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 304 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 305 # This check ensures we'll avoid trying to parse these as JSON paths, which can 306 # either result in a noisy warning or in an invalid representation of the path. 307 path_text = path.name 308 if path_text.startswith("/") or "[#" in path_text: 309 return path 310 311 return super().to_json_path(path)
313 class Tokenizer(tokens.Tokenizer): 314 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 315 HEREDOC_STRINGS = ["$"] 316 317 HEREDOC_TAG_IS_IDENTIFIER = True 318 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 319 320 KEYWORDS = { 321 **tokens.Tokenizer.KEYWORDS, 322 "//": TokenType.DIV, 323 "**": TokenType.DSTAR, 324 "^@": TokenType.CARET_AT, 325 "@>": TokenType.AT_GT, 326 "<@": TokenType.LT_AT, 327 "ATTACH": TokenType.ATTACH, 328 "BINARY": TokenType.VARBINARY, 329 "BITSTRING": TokenType.BIT, 330 "BPCHAR": TokenType.TEXT, 331 "CHAR": TokenType.TEXT, 332 "CHARACTER VARYING": TokenType.TEXT, 333 "DETACH": TokenType.DETACH, 334 "EXCLUDE": TokenType.EXCEPT, 335 "LOGICAL": TokenType.BOOLEAN, 336 "ONLY": TokenType.ONLY, 337 "PIVOT_WIDER": TokenType.PIVOT, 338 "POSITIONAL": TokenType.POSITIONAL, 339 "SIGNED": TokenType.INT, 340 "STRING": TokenType.TEXT, 341 "SUMMARIZE": TokenType.SUMMARIZE, 342 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 343 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 344 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 345 "TIMESTAMP_US": TokenType.TIMESTAMP, 346 "UBIGINT": TokenType.UBIGINT, 347 "UINTEGER": TokenType.UINT, 348 "USMALLINT": TokenType.USMALLINT, 349 "UTINYINT": TokenType.UTINYINT, 350 "VARCHAR": TokenType.TEXT, 351 } 352 KEYWORDS.pop("/*+") 353 354 SINGLE_TOKENS = { 355 **tokens.Tokenizer.SINGLE_TOKENS, 356 "$": TokenType.PARAMETER, 357 } 358 359 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
361 class Parser(parser.Parser): 362 BITWISE = { 363 **parser.Parser.BITWISE, 364 TokenType.TILDA: exp.RegexpLike, 365 } 366 BITWISE.pop(TokenType.CARET) 367 368 RANGE_PARSERS = { 369 **parser.Parser.RANGE_PARSERS, 370 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 371 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 372 } 373 374 EXPONENT = { 375 **parser.Parser.EXPONENT, 376 TokenType.CARET: exp.Pow, 377 TokenType.DSTAR: exp.Pow, 378 } 379 380 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 381 382 SHOW_PARSERS = { 383 "TABLES": _show_parser("TABLES"), 384 "ALL TABLES": _show_parser("ALL TABLES"), 385 } 386 387 FUNCTIONS = { 388 **parser.Parser.FUNCTIONS, 389 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 390 "ARRAY_SORT": exp.SortArray.from_arg_list, 391 "DATEDIFF": _build_date_diff, 392 "DATE_DIFF": _build_date_diff, 393 "DATE_TRUNC": date_trunc_to_time, 394 "DATETRUNC": date_trunc_to_time, 395 "DECODE": lambda args: exp.Decode( 396 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 397 ), 398 "EDITDIST3": exp.Levenshtein.from_arg_list, 399 "ENCODE": lambda args: exp.Encode( 400 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 401 ), 402 "EPOCH": exp.TimeToUnix.from_arg_list, 403 "EPOCH_MS": lambda args: exp.UnixToTime( 404 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 405 ), 406 "GENERATE_SERIES": _build_generate_series(), 407 "JSON": exp.ParseJSON.from_arg_list, 408 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 409 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 410 "LIST_HAS": exp.ArrayContains.from_arg_list, 411 "LIST_REVERSE_SORT": _build_sort_array_desc, 412 "LIST_SORT": exp.SortArray.from_arg_list, 413 "LIST_VALUE": lambda args: exp.Array(expressions=args), 414 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 415 "MAKE_TIMESTAMP": _build_make_timestamp, 416 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 417 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 418 "RANGE": _build_generate_series(end_exclusive=True), 419 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 420 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 421 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 422 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 423 this=seq_get(args, 0), 424 expression=seq_get(args, 1), 425 replacement=seq_get(args, 2), 426 modifiers=seq_get(args, 3), 427 ), 428 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 429 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 430 "STRING_SPLIT": exp.Split.from_arg_list, 431 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 432 "STRING_TO_ARRAY": exp.Split.from_arg_list, 433 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 434 "STRUCT_PACK": exp.Struct.from_arg_list, 435 "STR_SPLIT": exp.Split.from_arg_list, 436 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 437 "TIME_BUCKET": exp.DateBin.from_arg_list, 438 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 439 "UNNEST": exp.Explode.from_arg_list, 440 "XOR": binary_from_function(exp.BitwiseXor), 441 } 442 443 FUNCTIONS.pop("DATE_SUB") 444 FUNCTIONS.pop("GLOB") 445 446 FUNCTION_PARSERS = { 447 **parser.Parser.FUNCTION_PARSERS, 448 **dict.fromkeys( 449 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 450 ), 451 } 452 FUNCTION_PARSERS.pop("DECODE") 453 454 NO_PAREN_FUNCTION_PARSERS = { 455 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 456 "MAP": lambda self: self._parse_map(), 457 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 458 } 459 460 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 461 TokenType.SEMI, 462 TokenType.ANTI, 463 } 464 465 PLACEHOLDER_PARSERS = { 466 **parser.Parser.PLACEHOLDER_PARSERS, 467 TokenType.PARAMETER: lambda self: ( 468 self.expression(exp.Placeholder, this=self._prev.text) 469 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 470 else None 471 ), 472 } 473 474 TYPE_CONVERTERS = { 475 # https://duckdb.org/docs/sql/data_types/numeric 476 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 477 # https://duckdb.org/docs/sql/data_types/text 478 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 479 } 480 481 STATEMENT_PARSERS = { 482 **parser.Parser.STATEMENT_PARSERS, 483 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 484 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 485 TokenType.SHOW: lambda self: self._parse_show(), 486 } 487 488 def _parse_expression(self) -> t.Optional[exp.Expression]: 489 # DuckDB supports prefix aliases, e.g. foo: 1 490 if self._next and self._next.token_type == TokenType.COLON: 491 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 492 self._match(TokenType.COLON) 493 comments = self._prev_comments or [] 494 495 this = self._parse_assignment() 496 if isinstance(this, exp.Expression): 497 # Moves the comment next to the alias in `alias: expr /* comment */` 498 comments += this.pop_comments() or [] 499 500 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 501 502 return super()._parse_expression() 503 504 def _parse_table( 505 self, 506 schema: bool = False, 507 joins: bool = False, 508 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 509 parse_bracket: bool = False, 510 is_db_reference: bool = False, 511 parse_partition: bool = False, 512 ) -> t.Optional[exp.Expression]: 513 # DuckDB supports prefix aliases, e.g. FROM foo: bar 514 if self._next and self._next.token_type == TokenType.COLON: 515 alias = self._parse_table_alias( 516 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 517 ) 518 self._match(TokenType.COLON) 519 comments = self._prev_comments or [] 520 else: 521 alias = None 522 comments = [] 523 524 table = super()._parse_table( 525 schema=schema, 526 joins=joins, 527 alias_tokens=alias_tokens, 528 parse_bracket=parse_bracket, 529 is_db_reference=is_db_reference, 530 parse_partition=parse_partition, 531 ) 532 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 533 # Moves the comment next to the alias in `alias: table /* comment */` 534 comments += table.pop_comments() or [] 535 alias.comments = alias.pop_comments() + comments 536 table.set("alias", alias) 537 538 return table 539 540 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 541 # https://duckdb.org/docs/sql/samples.html 542 sample = super()._parse_table_sample(as_modifier=as_modifier) 543 if sample and not sample.args.get("method"): 544 if sample.args.get("size"): 545 sample.set("method", exp.var("RESERVOIR")) 546 else: 547 sample.set("method", exp.var("SYSTEM")) 548 549 return sample 550 551 def _parse_bracket( 552 self, this: t.Optional[exp.Expression] = None 553 ) -> t.Optional[exp.Expression]: 554 bracket = super()._parse_bracket(this) 555 556 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 557 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 558 bracket.set("returns_list_for_maps", True) 559 560 return bracket 561 562 def _parse_map(self) -> exp.ToMap | exp.Map: 563 if self._match(TokenType.L_BRACE, advance=False): 564 return self.expression(exp.ToMap, this=self._parse_bracket()) 565 566 args = self._parse_wrapped_csv(self._parse_assignment) 567 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 568 569 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 570 return self._parse_field_def() 571 572 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 573 if len(aggregations) == 1: 574 return super()._pivot_column_names(aggregations) 575 return pivot_column_names(aggregations, dialect="duckdb") 576 577 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 578 def _parse_attach_option() -> exp.AttachOption: 579 return self.expression( 580 exp.AttachOption, 581 this=self._parse_var(any_token=True), 582 expression=self._parse_field(any_token=True), 583 ) 584 585 self._match(TokenType.DATABASE) 586 exists = self._parse_exists(not_=is_attach) 587 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 588 589 if self._match(TokenType.L_PAREN, advance=False): 590 expressions = self._parse_wrapped_csv(_parse_attach_option) 591 else: 592 expressions = None 593 594 return ( 595 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 596 if is_attach 597 else self.expression(exp.Detach, this=this, exists=exists) 598 ) 599 600 def _parse_show_duckdb(self, this: str) -> exp.Show: 601 return self.expression(exp.Show, this=this)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
603 class Generator(generator.Generator): 604 PARAMETER_TOKEN = "$" 605 NAMED_PLACEHOLDER_TOKEN = "$" 606 JOIN_HINTS = False 607 TABLE_HINTS = False 608 QUERY_HINTS = False 609 LIMIT_FETCH = "LIMIT" 610 STRUCT_DELIMITER = ("(", ")") 611 RENAME_TABLE_WITH_DB = False 612 NVL2_SUPPORTED = False 613 SEMI_ANTI_JOIN_WITH_SIDE = False 614 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 615 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 616 LAST_DAY_SUPPORTS_DATE_PART = False 617 JSON_KEY_VALUE_PAIR_SEP = "," 618 IGNORE_NULLS_IN_FUNC = True 619 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 620 SUPPORTS_CREATE_TABLE_LIKE = False 621 MULTI_ARG_DISTINCT = False 622 CAN_IMPLEMENT_ARRAY_ANY = True 623 SUPPORTS_TO_NUMBER = False 624 COPY_HAS_INTO_KEYWORD = False 625 STAR_EXCEPT = "EXCLUDE" 626 PAD_FILL_PATTERN_IS_REQUIRED = True 627 ARRAY_CONCAT_IS_VAR_LEN = False 628 ARRAY_SIZE_DIM_REQUIRED = False 629 630 TRANSFORMS = { 631 **generator.Generator.TRANSFORMS, 632 exp.ApproxDistinct: approx_count_distinct_sql, 633 exp.Array: inline_array_unless_query, 634 exp.ArrayFilter: rename_func("LIST_FILTER"), 635 exp.ArraySort: _array_sort_sql, 636 exp.ArraySum: rename_func("LIST_SUM"), 637 exp.BitwiseXor: rename_func("XOR"), 638 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 639 exp.CurrentDate: lambda *_: "CURRENT_DATE", 640 exp.CurrentTime: lambda *_: "CURRENT_TIME", 641 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 642 exp.DayOfMonth: rename_func("DAYOFMONTH"), 643 exp.DayOfWeek: rename_func("DAYOFWEEK"), 644 exp.DayOfWeekIso: rename_func("ISODOW"), 645 exp.DayOfYear: rename_func("DAYOFYEAR"), 646 exp.DataType: _datatype_sql, 647 exp.Date: _date_sql, 648 exp.DateAdd: _date_delta_sql, 649 exp.DateFromParts: rename_func("MAKE_DATE"), 650 exp.DateSub: _date_delta_sql, 651 exp.DateDiff: _date_diff_sql, 652 exp.DateStrToDate: datestrtodate_sql, 653 exp.Datetime: no_datetime_sql, 654 exp.DatetimeSub: _date_delta_sql, 655 exp.DatetimeAdd: _date_delta_sql, 656 exp.DateToDi: lambda self, 657 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 658 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 659 exp.DiToDate: lambda self, 660 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 661 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 662 exp.GenerateDateArray: _generate_datetime_array_sql, 663 exp.GenerateTimestampArray: _generate_datetime_array_sql, 664 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 665 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 666 exp.Explode: rename_func("UNNEST"), 667 exp.IntDiv: lambda self, e: self.binary(e, "//"), 668 exp.IsInf: rename_func("ISINF"), 669 exp.IsNan: rename_func("ISNAN"), 670 exp.JSONBExists: rename_func("JSON_EXISTS"), 671 exp.JSONExtract: _arrow_json_extract_sql, 672 exp.JSONExtractArray: _json_extract_value_array_sql, 673 exp.JSONExtractScalar: _arrow_json_extract_sql, 674 exp.JSONFormat: _json_format_sql, 675 exp.JSONValueArray: _json_extract_value_array_sql, 676 exp.Lateral: explode_to_unnest_sql, 677 exp.LogicalOr: rename_func("BOOL_OR"), 678 exp.LogicalAnd: rename_func("BOOL_AND"), 679 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 680 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 681 exp.MonthsBetween: lambda self, e: self.func( 682 "DATEDIFF", 683 "'month'", 684 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 685 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 686 ), 687 exp.PercentileCont: rename_func("QUANTILE_CONT"), 688 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 689 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 690 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 691 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 692 exp.RegexpReplace: lambda self, e: self.func( 693 "REGEXP_REPLACE", 694 e.this, 695 e.expression, 696 e.args.get("replacement"), 697 e.args.get("modifiers"), 698 ), 699 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 700 exp.RegexpILike: lambda self, e: self.func( 701 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 702 ), 703 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 704 exp.Return: lambda self, e: self.sql(e, "this"), 705 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 706 exp.Rand: rename_func("RANDOM"), 707 exp.SHA: rename_func("SHA1"), 708 exp.SHA2: sha256_sql, 709 exp.Split: rename_func("STR_SPLIT"), 710 exp.SortArray: _sort_array_sql, 711 exp.StrPosition: strposition_sql, 712 exp.StrToUnix: lambda self, e: self.func( 713 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 714 ), 715 exp.Struct: _struct_sql, 716 exp.Transform: rename_func("LIST_TRANSFORM"), 717 exp.TimeAdd: _date_delta_sql, 718 exp.Time: no_time_sql, 719 exp.TimeDiff: _timediff_sql, 720 exp.Timestamp: no_timestamp_sql, 721 exp.TimestampDiff: lambda self, e: self.func( 722 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 723 ), 724 exp.TimestampTrunc: timestamptrunc_sql(), 725 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 726 exp.TimeStrToTime: timestrtotime_sql, 727 exp.TimeStrToUnix: lambda self, e: self.func( 728 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 729 ), 730 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 731 exp.TimeToUnix: rename_func("EPOCH"), 732 exp.TsOrDiToDi: lambda self, 733 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 734 exp.TsOrDsAdd: _date_delta_sql, 735 exp.TsOrDsDiff: lambda self, e: self.func( 736 "DATE_DIFF", 737 f"'{e.args.get('unit') or 'DAY'}'", 738 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 739 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 740 ), 741 exp.UnixToStr: lambda self, e: self.func( 742 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 743 ), 744 exp.DatetimeTrunc: lambda self, e: self.func( 745 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 746 ), 747 exp.UnixToTime: _unix_to_time_sql, 748 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 749 exp.VariancePop: rename_func("VAR_POP"), 750 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 751 exp.Xor: bool_xor_sql, 752 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 753 rename_func("LEVENSHTEIN") 754 ), 755 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 756 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 757 exp.DateBin: rename_func("TIME_BUCKET"), 758 } 759 760 SUPPORTED_JSON_PATH_PARTS = { 761 exp.JSONPathKey, 762 exp.JSONPathRoot, 763 exp.JSONPathSubscript, 764 exp.JSONPathWildcard, 765 } 766 767 TYPE_MAPPING = { 768 **generator.Generator.TYPE_MAPPING, 769 exp.DataType.Type.BINARY: "BLOB", 770 exp.DataType.Type.BPCHAR: "TEXT", 771 exp.DataType.Type.CHAR: "TEXT", 772 exp.DataType.Type.DATETIME: "TIMESTAMP", 773 exp.DataType.Type.FLOAT: "REAL", 774 exp.DataType.Type.JSONB: "JSON", 775 exp.DataType.Type.NCHAR: "TEXT", 776 exp.DataType.Type.NVARCHAR: "TEXT", 777 exp.DataType.Type.UINT: "UINTEGER", 778 exp.DataType.Type.VARBINARY: "BLOB", 779 exp.DataType.Type.ROWVERSION: "BLOB", 780 exp.DataType.Type.VARCHAR: "TEXT", 781 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 782 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 783 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 784 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 785 } 786 787 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 788 RESERVED_KEYWORDS = { 789 "array", 790 "analyse", 791 "union", 792 "all", 793 "when", 794 "in_p", 795 "default", 796 "create_p", 797 "window", 798 "asymmetric", 799 "to", 800 "else", 801 "localtime", 802 "from", 803 "end_p", 804 "select", 805 "current_date", 806 "foreign", 807 "with", 808 "grant", 809 "session_user", 810 "or", 811 "except", 812 "references", 813 "fetch", 814 "limit", 815 "group_p", 816 "leading", 817 "into", 818 "collate", 819 "offset", 820 "do", 821 "then", 822 "localtimestamp", 823 "check_p", 824 "lateral_p", 825 "current_role", 826 "where", 827 "asc_p", 828 "placing", 829 "desc_p", 830 "user", 831 "unique", 832 "initially", 833 "column", 834 "both", 835 "some", 836 "as", 837 "any", 838 "only", 839 "deferrable", 840 "null_p", 841 "current_time", 842 "true_p", 843 "table", 844 "case", 845 "trailing", 846 "variadic", 847 "for", 848 "on", 849 "distinct", 850 "false_p", 851 "not", 852 "constraint", 853 "current_timestamp", 854 "returning", 855 "primary", 856 "intersect", 857 "having", 858 "analyze", 859 "current_user", 860 "and", 861 "cast", 862 "symmetric", 863 "using", 864 "order", 865 "current_catalog", 866 } 867 868 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 869 870 # DuckDB doesn't generally support CREATE TABLE .. properties 871 # https://duckdb.org/docs/sql/statements/create_table.html 872 PROPERTIES_LOCATION = { 873 prop: exp.Properties.Location.UNSUPPORTED 874 for prop in generator.Generator.PROPERTIES_LOCATION 875 } 876 877 # There are a few exceptions (e.g. temporary tables) which are supported or 878 # can be transpiled to DuckDB, so we explicitly override them accordingly 879 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 880 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 881 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 882 883 def show_sql(self, expression: exp.Show) -> str: 884 return f"SHOW {expression.name}" 885 886 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 887 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 888 889 def strtotime_sql(self, expression: exp.StrToTime) -> str: 890 if expression.args.get("safe"): 891 formatted_time = self.format_time(expression) 892 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 893 return str_to_time_sql(self, expression) 894 895 def strtodate_sql(self, expression: exp.StrToDate) -> str: 896 if expression.args.get("safe"): 897 formatted_time = self.format_time(expression) 898 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 899 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 900 901 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 902 arg = expression.this 903 if expression.args.get("safe"): 904 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 905 return self.func("JSON", arg) 906 907 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 908 nano = expression.args.get("nano") 909 if nano is not None: 910 expression.set( 911 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 912 ) 913 914 return rename_func("MAKE_TIME")(self, expression) 915 916 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 917 sec = expression.args["sec"] 918 919 milli = expression.args.get("milli") 920 if milli is not None: 921 sec += milli.pop() / exp.Literal.number(1000.0) 922 923 nano = expression.args.get("nano") 924 if nano is not None: 925 sec += nano.pop() / exp.Literal.number(1000000000.0) 926 927 if milli or nano: 928 expression.set("sec", sec) 929 930 return rename_func("MAKE_TIMESTAMP")(self, expression) 931 932 def tablesample_sql( 933 self, 934 expression: exp.TableSample, 935 tablesample_keyword: t.Optional[str] = None, 936 ) -> str: 937 if not isinstance(expression.parent, exp.Select): 938 # This sample clause only applies to a single source, not the entire resulting relation 939 tablesample_keyword = "TABLESAMPLE" 940 941 if expression.args.get("size"): 942 method = expression.args.get("method") 943 if method and method.name.upper() != "RESERVOIR": 944 self.unsupported( 945 f"Sampling method {method} is not supported with a discrete sample count, " 946 "defaulting to reservoir sampling" 947 ) 948 expression.set("method", exp.var("RESERVOIR")) 949 950 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 951 952 def interval_sql(self, expression: exp.Interval) -> str: 953 multiplier: t.Optional[int] = None 954 unit = expression.text("unit").lower() 955 956 if unit.startswith("week"): 957 multiplier = 7 958 if unit.startswith("quarter"): 959 multiplier = 90 960 961 if multiplier: 962 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 963 964 return super().interval_sql(expression) 965 966 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 967 if isinstance(expression.parent, exp.UserDefinedFunction): 968 return self.sql(expression, "this") 969 return super().columndef_sql(expression, sep) 970 971 def join_sql(self, expression: exp.Join) -> str: 972 if ( 973 expression.side == "LEFT" 974 and not expression.args.get("on") 975 and isinstance(expression.this, exp.Unnest) 976 ): 977 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 978 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 979 return super().join_sql(expression.on(exp.true())) 980 981 return super().join_sql(expression) 982 983 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 984 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 985 if expression.args.get("is_end_exclusive"): 986 return rename_func("RANGE")(self, expression) 987 988 return self.function_fallback_sql(expression) 989 990 def countif_sql(self, expression: exp.CountIf) -> str: 991 if self.dialect.version >= Version("1.2"): 992 return self.function_fallback_sql(expression) 993 994 # https://github.com/tobymao/sqlglot/pull/4749 995 return count_if_to_sum(self, expression) 996 997 def bracket_sql(self, expression: exp.Bracket) -> str: 998 if self.dialect.version >= Version("1.2"): 999 return super().bracket_sql(expression) 1000 1001 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1002 this = expression.this 1003 if isinstance(this, exp.Array): 1004 this.replace(exp.paren(this)) 1005 1006 bracket = super().bracket_sql(expression) 1007 1008 if not expression.args.get("returns_list_for_maps"): 1009 if not this.type: 1010 from sqlglot.optimizer.annotate_types import annotate_types 1011 1012 this = annotate_types(this, dialect=self.dialect) 1013 1014 if this.is_type(exp.DataType.Type.MAP): 1015 bracket = f"({bracket})[1]" 1016 1017 return bracket 1018 1019 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1020 expression_sql = self.sql(expression, "expression") 1021 1022 func = expression.this 1023 if isinstance(func, exp.PERCENTILES): 1024 # Make the order key the first arg and slide the fraction to the right 1025 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1026 order_col = expression.find(exp.Ordered) 1027 if order_col: 1028 func.set("expression", func.this) 1029 func.set("this", order_col.this) 1030 1031 this = self.sql(expression, "this").rstrip(")") 1032 1033 return f"{this}{expression_sql})" 1034 1035 def length_sql(self, expression: exp.Length) -> str: 1036 arg = expression.this 1037 1038 # Dialects like BQ and Snowflake also accept binary values as args, so 1039 # DDB will attempt to infer the type or resort to case/when resolution 1040 if not expression.args.get("binary") or arg.is_string: 1041 return self.func("LENGTH", arg) 1042 1043 if not arg.type: 1044 from sqlglot.optimizer.annotate_types import annotate_types 1045 1046 arg = annotate_types(arg, dialect=self.dialect) 1047 1048 if arg.is_type(*exp.DataType.TEXT_TYPES): 1049 return self.func("LENGTH", arg) 1050 1051 # We need these casts to make duckdb's static type checker happy 1052 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1053 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1054 1055 case = ( 1056 exp.case(self.func("TYPEOF", arg)) 1057 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1058 .else_( 1059 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1060 ) # anonymous to break length_sql recursion 1061 ) 1062 1063 return self.sql(case) 1064 1065 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1066 this = expression.this 1067 key = expression.args.get("key") 1068 key_sql = key.name if isinstance(key, exp.Expression) else "" 1069 value_sql = self.sql(expression, "value") 1070 1071 kv_sql = f"{key_sql} := {value_sql}" 1072 1073 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1074 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1075 if isinstance(this, exp.Struct) and not this.expressions: 1076 return self.func("STRUCT_PACK", kv_sql) 1077 1078 return self.func("STRUCT_INSERT", this, kv_sql) 1079 1080 def unnest_sql(self, expression: exp.Unnest) -> str: 1081 explode_array = expression.args.get("explode_array") 1082 if explode_array: 1083 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1084 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1085 expression.expressions.append( 1086 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1087 ) 1088 1089 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1090 alias = expression.args.get("alias") 1091 if alias: 1092 expression.set("alias", None) 1093 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1094 1095 unnest_sql = super().unnest_sql(expression) 1096 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1097 return self.sql(select) 1098 1099 return super().unnest_sql(expression) 1100 1101 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1102 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1103 # DuckDB should render IGNORE NULLS only for the general-purpose 1104 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1105 return super().ignorenulls_sql(expression) 1106 1107 return self.sql(expression, "this") 1108 1109 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1110 this = self.sql(expression, "this") 1111 null_text = self.sql(expression, "null") 1112 1113 if null_text: 1114 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1115 1116 return self.func("ARRAY_TO_STRING", this, expression.expression) 1117 1118 @unsupported_args("position", "occurrence") 1119 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1120 group = expression.args.get("group") 1121 params = expression.args.get("parameters") 1122 1123 # Do not render group if there is no following argument, 1124 # and it's the default value for this dialect 1125 if ( 1126 not params 1127 and group 1128 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1129 ): 1130 group = None 1131 return self.func( 1132 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1133 ) 1134 1135 @unsupported_args("culture") 1136 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1137 fmt = expression.args.get("format") 1138 if fmt and fmt.is_int: 1139 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1140 1141 self.unsupported("Only integer formats are supported by NumberToStr") 1142 return self.function_fallback_sql(expression) 1143 1144 def autoincrementcolumnconstraint_sql(self, _) -> str: 1145 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1146 return ""
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
895 def strtodate_sql(self, expression: exp.StrToDate) -> str: 896 if expression.args.get("safe"): 897 formatted_time = self.format_time(expression) 898 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 899 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
907 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 908 nano = expression.args.get("nano") 909 if nano is not None: 910 expression.set( 911 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 912 ) 913 914 return rename_func("MAKE_TIME")(self, expression)
916 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 917 sec = expression.args["sec"] 918 919 milli = expression.args.get("milli") 920 if milli is not None: 921 sec += milli.pop() / exp.Literal.number(1000.0) 922 923 nano = expression.args.get("nano") 924 if nano is not None: 925 sec += nano.pop() / exp.Literal.number(1000000000.0) 926 927 if milli or nano: 928 expression.set("sec", sec) 929 930 return rename_func("MAKE_TIMESTAMP")(self, expression)
932 def tablesample_sql( 933 self, 934 expression: exp.TableSample, 935 tablesample_keyword: t.Optional[str] = None, 936 ) -> str: 937 if not isinstance(expression.parent, exp.Select): 938 # This sample clause only applies to a single source, not the entire resulting relation 939 tablesample_keyword = "TABLESAMPLE" 940 941 if expression.args.get("size"): 942 method = expression.args.get("method") 943 if method and method.name.upper() != "RESERVOIR": 944 self.unsupported( 945 f"Sampling method {method} is not supported with a discrete sample count, " 946 "defaulting to reservoir sampling" 947 ) 948 expression.set("method", exp.var("RESERVOIR")) 949 950 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
952 def interval_sql(self, expression: exp.Interval) -> str: 953 multiplier: t.Optional[int] = None 954 unit = expression.text("unit").lower() 955 956 if unit.startswith("week"): 957 multiplier = 7 958 if unit.startswith("quarter"): 959 multiplier = 90 960 961 if multiplier: 962 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 963 964 return super().interval_sql(expression)
971 def join_sql(self, expression: exp.Join) -> str: 972 if ( 973 expression.side == "LEFT" 974 and not expression.args.get("on") 975 and isinstance(expression.this, exp.Unnest) 976 ): 977 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 978 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 979 return super().join_sql(expression.on(exp.true())) 980 981 return super().join_sql(expression)
997 def bracket_sql(self, expression: exp.Bracket) -> str: 998 if self.dialect.version >= Version("1.2"): 999 return super().bracket_sql(expression) 1000 1001 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1002 this = expression.this 1003 if isinstance(this, exp.Array): 1004 this.replace(exp.paren(this)) 1005 1006 bracket = super().bracket_sql(expression) 1007 1008 if not expression.args.get("returns_list_for_maps"): 1009 if not this.type: 1010 from sqlglot.optimizer.annotate_types import annotate_types 1011 1012 this = annotate_types(this, dialect=self.dialect) 1013 1014 if this.is_type(exp.DataType.Type.MAP): 1015 bracket = f"({bracket})[1]" 1016 1017 return bracket
1019 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1020 expression_sql = self.sql(expression, "expression") 1021 1022 func = expression.this 1023 if isinstance(func, exp.PERCENTILES): 1024 # Make the order key the first arg and slide the fraction to the right 1025 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1026 order_col = expression.find(exp.Ordered) 1027 if order_col: 1028 func.set("expression", func.this) 1029 func.set("this", order_col.this) 1030 1031 this = self.sql(expression, "this").rstrip(")") 1032 1033 return f"{this}{expression_sql})"
1035 def length_sql(self, expression: exp.Length) -> str: 1036 arg = expression.this 1037 1038 # Dialects like BQ and Snowflake also accept binary values as args, so 1039 # DDB will attempt to infer the type or resort to case/when resolution 1040 if not expression.args.get("binary") or arg.is_string: 1041 return self.func("LENGTH", arg) 1042 1043 if not arg.type: 1044 from sqlglot.optimizer.annotate_types import annotate_types 1045 1046 arg = annotate_types(arg, dialect=self.dialect) 1047 1048 if arg.is_type(*exp.DataType.TEXT_TYPES): 1049 return self.func("LENGTH", arg) 1050 1051 # We need these casts to make duckdb's static type checker happy 1052 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1053 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1054 1055 case = ( 1056 exp.case(self.func("TYPEOF", arg)) 1057 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1058 .else_( 1059 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1060 ) # anonymous to break length_sql recursion 1061 ) 1062 1063 return self.sql(case)
1065 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1066 this = expression.this 1067 key = expression.args.get("key") 1068 key_sql = key.name if isinstance(key, exp.Expression) else "" 1069 value_sql = self.sql(expression, "value") 1070 1071 kv_sql = f"{key_sql} := {value_sql}" 1072 1073 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1074 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1075 if isinstance(this, exp.Struct) and not this.expressions: 1076 return self.func("STRUCT_PACK", kv_sql) 1077 1078 return self.func("STRUCT_INSERT", this, kv_sql)
1080 def unnest_sql(self, expression: exp.Unnest) -> str: 1081 explode_array = expression.args.get("explode_array") 1082 if explode_array: 1083 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1084 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1085 expression.expressions.append( 1086 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1087 ) 1088 1089 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1090 alias = expression.args.get("alias") 1091 if alias: 1092 expression.set("alias", None) 1093 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1094 1095 unnest_sql = super().unnest_sql(expression) 1096 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1097 return self.sql(select) 1098 1099 return super().unnest_sql(expression)
1101 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1102 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1103 # DuckDB should render IGNORE NULLS only for the general-purpose 1104 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1105 return super().ignorenulls_sql(expression) 1106 1107 return self.sql(expression, "this")
1109 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1110 this = self.sql(expression, "this") 1111 null_text = self.sql(expression, "null") 1112 1113 if null_text: 1114 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1115 1116 return self.func("ARRAY_TO_STRING", this, expression.expression)
1118 @unsupported_args("position", "occurrence") 1119 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1120 group = expression.args.get("group") 1121 params = expression.args.get("parameters") 1122 1123 # Do not render group if there is no following argument, 1124 # and it's the default value for this dialect 1125 if ( 1126 not params 1127 and group 1128 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1129 ): 1130 group = None 1131 return self.func( 1132 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1133 )
1135 @unsupported_args("culture") 1136 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1137 fmt = expression.args.get("format") 1138 if fmt and fmt.is_int: 1139 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1140 1141 self.unsupported("Only integer formats are supported by NumberToStr") 1142 return self.function_fallback_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- put_sql