sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_trunc_to_time, 19 datestrtodate_sql, 20 no_datetime_sql, 21 encode_decode_sql, 22 build_formatted_time, 23 inline_array_unless_query, 24 no_comment_column_constraint_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 remove_from_array_using_filter, 30 strposition_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38 explode_to_unnest_sql, 39 no_make_interval_sql, 40 groupconcat_sql, 41) 42from sqlglot.generator import unsupported_args 43from sqlglot.helper import seq_get 44from sqlglot.tokens import TokenType 45from sqlglot.parser import binary_range_parser 46 47DATETIME_DELTA = t.Union[ 48 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 49] 50 51 52def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 53 this = expression.this 54 unit = unit_to_var(expression) 55 op = ( 56 "+" 57 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 58 else "-" 59 ) 60 61 to_type: t.Optional[DATA_TYPE] = None 62 if isinstance(expression, exp.TsOrDsAdd): 63 to_type = expression.return_type 64 elif this.is_string: 65 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 66 to_type = ( 67 exp.DataType.Type.DATETIME 68 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 69 else exp.DataType.Type.DATE 70 ) 71 72 this = exp.cast(this, to_type) if to_type else this 73 74 expr = expression.expression 75 interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit) 76 77 return f"{self.sql(this)} {op} {self.sql(interval)}" 78 79 80# BigQuery -> DuckDB conversion for the DATE function 81def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 82 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 83 zone = self.sql(expression, "zone") 84 85 if zone: 86 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 87 date_str = f"{date_str} || ' ' || {zone}" 88 89 # This will create a TIMESTAMP with time zone information 90 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 91 92 return result 93 94 95# BigQuery -> DuckDB conversion for the TIME_DIFF function 96def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 97 this = exp.cast(expression.this, exp.DataType.Type.TIME) 98 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 99 100 # Although the 2 dialects share similar signatures, BQ seems to inverse 101 # the sign of the result so the start/end time operands are flipped 102 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 103 104 105@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 106def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 107 return self.func("ARRAY_SORT", expression.this) 108 109 110def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 111 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 112 return self.func(name, expression.this) 113 114 115def _build_sort_array_desc(args: t.List) -> exp.Expression: 116 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 117 118 119def _build_date_diff(args: t.List) -> exp.Expression: 120 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 121 122 123def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 124 def _builder(args: t.List) -> exp.GenerateSeries: 125 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 126 if len(args) == 1: 127 # DuckDB uses 0 as a default for the series' start when it's omitted 128 args.insert(0, exp.Literal.number("0")) 129 130 gen_series = exp.GenerateSeries.from_arg_list(args) 131 gen_series.set("is_end_exclusive", end_exclusive) 132 133 return gen_series 134 135 return _builder 136 137 138def _build_make_timestamp(args: t.List) -> exp.Expression: 139 if len(args) == 1: 140 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 141 142 return exp.TimestampFromParts( 143 year=seq_get(args, 0), 144 month=seq_get(args, 1), 145 day=seq_get(args, 2), 146 hour=seq_get(args, 3), 147 min=seq_get(args, 4), 148 sec=seq_get(args, 5), 149 ) 150 151 152def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 153 def _parse(self: DuckDB.Parser) -> exp.Show: 154 return self._parse_show_duckdb(*args, **kwargs) 155 156 return _parse 157 158 159def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 160 args: t.List[str] = [] 161 162 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 163 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 164 # The transformation to ROW will take place if: 165 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 166 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 167 ancestor_cast = expression.find_ancestor(exp.Cast) 168 is_bq_inline_struct = ( 169 (expression.find(exp.PropertyEQ) is None) 170 and ancestor_cast 171 and any( 172 casted_type.is_type(exp.DataType.Type.STRUCT) 173 for casted_type in ancestor_cast.find_all(exp.DataType) 174 ) 175 ) 176 177 for i, expr in enumerate(expression.expressions): 178 is_property_eq = isinstance(expr, exp.PropertyEQ) 179 value = expr.expression if is_property_eq else expr 180 181 if is_bq_inline_struct: 182 args.append(self.sql(value)) 183 else: 184 key = expr.name if is_property_eq else f"_{i}" 185 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 186 187 csv_args = ", ".join(args) 188 189 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 190 191 192def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 193 if expression.is_type("array"): 194 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 195 196 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 197 if expression.is_type( 198 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 199 ): 200 return expression.this.value 201 202 return self.datatype_sql(expression) 203 204 205def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 206 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 207 return f"CAST({sql} AS TEXT)" 208 209 210def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 211 scale = expression.args.get("scale") 212 timestamp = expression.this 213 214 if scale in (None, exp.UnixToTime.SECONDS): 215 return self.func("TO_TIMESTAMP", timestamp) 216 if scale == exp.UnixToTime.MILLIS: 217 return self.func("EPOCH_MS", timestamp) 218 if scale == exp.UnixToTime.MICROS: 219 return self.func("MAKE_TIMESTAMP", timestamp) 220 221 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 222 223 224WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 225 226 227def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 228 arrow_sql = arrow_json_extract_sql(self, expression) 229 if not expression.same_parent and isinstance( 230 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 231 ): 232 arrow_sql = self.wrap(arrow_sql) 233 return arrow_sql 234 235 236def _implicit_datetime_cast( 237 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 238) -> t.Optional[exp.Expression]: 239 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 240 241 242def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 243 this = _implicit_datetime_cast(expression.this) 244 expr = _implicit_datetime_cast(expression.expression) 245 246 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 247 248 249def _generate_datetime_array_sql( 250 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 251) -> str: 252 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 253 254 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 255 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 256 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 257 258 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 259 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 260 start=start, end=end, step=expression.args.get("step") 261 ) 262 263 if is_generate_date_array: 264 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 265 # GENERATE_DATE_ARRAY we must cast it back to DATE array 266 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 267 268 return self.sql(gen_series) 269 270 271def _json_extract_value_array_sql( 272 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 273) -> str: 274 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 275 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 276 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 277 278 279class DuckDB(Dialect): 280 NULL_ORDERING = "nulls_are_last" 281 SUPPORTS_USER_DEFINED_TYPES = True 282 SAFE_DIVISION = True 283 INDEX_OFFSET = 1 284 CONCAT_COALESCE = True 285 SUPPORTS_ORDER_BY_ALL = True 286 SUPPORTS_FIXED_SIZE_ARRAYS = True 287 STRICT_JSON_PATH_SYNTAX = False 288 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 289 290 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 291 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 292 293 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 294 if isinstance(path, exp.Literal): 295 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 296 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 297 # This check ensures we'll avoid trying to parse these as JSON paths, which can 298 # either result in a noisy warning or in an invalid representation of the path. 299 path_text = path.name 300 if path_text.startswith("/") or "[#" in path_text: 301 return path 302 303 return super().to_json_path(path) 304 305 class Tokenizer(tokens.Tokenizer): 306 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 307 HEREDOC_STRINGS = ["$"] 308 309 HEREDOC_TAG_IS_IDENTIFIER = True 310 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "//": TokenType.DIV, 315 "**": TokenType.DSTAR, 316 "^@": TokenType.CARET_AT, 317 "@>": TokenType.AT_GT, 318 "<@": TokenType.LT_AT, 319 "ATTACH": TokenType.ATTACH, 320 "BINARY": TokenType.VARBINARY, 321 "BITSTRING": TokenType.BIT, 322 "BPCHAR": TokenType.TEXT, 323 "CHAR": TokenType.TEXT, 324 "DATETIME": TokenType.TIMESTAMPNTZ, 325 "DETACH": TokenType.DETACH, 326 "EXCLUDE": TokenType.EXCEPT, 327 "LOGICAL": TokenType.BOOLEAN, 328 "ONLY": TokenType.ONLY, 329 "PIVOT_WIDER": TokenType.PIVOT, 330 "POSITIONAL": TokenType.POSITIONAL, 331 "SIGNED": TokenType.INT, 332 "STRING": TokenType.TEXT, 333 "SUMMARIZE": TokenType.SUMMARIZE, 334 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 335 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 336 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 337 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 338 "TIMESTAMP_US": TokenType.TIMESTAMP, 339 "UBIGINT": TokenType.UBIGINT, 340 "UINTEGER": TokenType.UINT, 341 "USMALLINT": TokenType.USMALLINT, 342 "UTINYINT": TokenType.UTINYINT, 343 "VARCHAR": TokenType.TEXT, 344 } 345 KEYWORDS.pop("/*+") 346 347 SINGLE_TOKENS = { 348 **tokens.Tokenizer.SINGLE_TOKENS, 349 "$": TokenType.PARAMETER, 350 } 351 352 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 353 354 class Parser(parser.Parser): 355 BITWISE = { 356 **parser.Parser.BITWISE, 357 TokenType.TILDA: exp.RegexpLike, 358 } 359 BITWISE.pop(TokenType.CARET) 360 361 RANGE_PARSERS = { 362 **parser.Parser.RANGE_PARSERS, 363 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 364 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 365 } 366 367 EXPONENT = { 368 **parser.Parser.EXPONENT, 369 TokenType.CARET: exp.Pow, 370 TokenType.DSTAR: exp.Pow, 371 } 372 373 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 374 375 SHOW_PARSERS = { 376 "TABLES": _show_parser("TABLES"), 377 "ALL TABLES": _show_parser("ALL TABLES"), 378 } 379 380 FUNCTIONS = { 381 **parser.Parser.FUNCTIONS, 382 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 383 "ARRAY_SORT": exp.SortArray.from_arg_list, 384 "DATEDIFF": _build_date_diff, 385 "DATE_DIFF": _build_date_diff, 386 "DATE_TRUNC": date_trunc_to_time, 387 "DATETRUNC": date_trunc_to_time, 388 "DECODE": lambda args: exp.Decode( 389 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 390 ), 391 "EDITDIST3": exp.Levenshtein.from_arg_list, 392 "ENCODE": lambda args: exp.Encode( 393 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 394 ), 395 "EPOCH": exp.TimeToUnix.from_arg_list, 396 "EPOCH_MS": lambda args: exp.UnixToTime( 397 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 398 ), 399 "GENERATE_SERIES": _build_generate_series(), 400 "JSON": exp.ParseJSON.from_arg_list, 401 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 402 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 403 "LIST_HAS": exp.ArrayContains.from_arg_list, 404 "LIST_REVERSE_SORT": _build_sort_array_desc, 405 "LIST_SORT": exp.SortArray.from_arg_list, 406 "LIST_VALUE": lambda args: exp.Array(expressions=args), 407 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 408 "MAKE_TIMESTAMP": _build_make_timestamp, 409 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 410 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 411 "RANGE": _build_generate_series(end_exclusive=True), 412 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 413 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 414 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 415 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 416 this=seq_get(args, 0), 417 expression=seq_get(args, 1), 418 replacement=seq_get(args, 2), 419 modifiers=seq_get(args, 3), 420 ), 421 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 422 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 423 "STRING_SPLIT": exp.Split.from_arg_list, 424 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 425 "STRING_TO_ARRAY": exp.Split.from_arg_list, 426 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 427 "STRUCT_PACK": exp.Struct.from_arg_list, 428 "STR_SPLIT": exp.Split.from_arg_list, 429 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 430 "TIME_BUCKET": exp.DateBin.from_arg_list, 431 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 432 "UNNEST": exp.Explode.from_arg_list, 433 "XOR": binary_from_function(exp.BitwiseXor), 434 } 435 436 FUNCTIONS.pop("DATE_SUB") 437 FUNCTIONS.pop("GLOB") 438 439 FUNCTION_PARSERS = { 440 **parser.Parser.FUNCTION_PARSERS, 441 **dict.fromkeys( 442 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 443 ), 444 } 445 FUNCTION_PARSERS.pop("DECODE") 446 447 NO_PAREN_FUNCTION_PARSERS = { 448 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 449 "MAP": lambda self: self._parse_map(), 450 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 451 } 452 453 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 454 TokenType.SEMI, 455 TokenType.ANTI, 456 } 457 458 PLACEHOLDER_PARSERS = { 459 **parser.Parser.PLACEHOLDER_PARSERS, 460 TokenType.PARAMETER: lambda self: ( 461 self.expression(exp.Placeholder, this=self._prev.text) 462 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 463 else None 464 ), 465 } 466 467 TYPE_CONVERTERS = { 468 # https://duckdb.org/docs/sql/data_types/numeric 469 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 470 # https://duckdb.org/docs/sql/data_types/text 471 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 472 } 473 474 STATEMENT_PARSERS = { 475 **parser.Parser.STATEMENT_PARSERS, 476 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 477 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 478 TokenType.SHOW: lambda self: self._parse_show(), 479 } 480 481 def _parse_expression(self) -> t.Optional[exp.Expression]: 482 # DuckDB supports prefix aliases, e.g. foo: 1 483 if self._next and self._next.token_type == TokenType.COLON: 484 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 485 self._match(TokenType.COLON) 486 comments = self._prev_comments or [] 487 488 this = self._parse_assignment() 489 if isinstance(this, exp.Expression): 490 # Moves the comment next to the alias in `alias: expr /* comment */` 491 comments += this.pop_comments() or [] 492 493 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 494 495 return super()._parse_expression() 496 497 def _parse_table( 498 self, 499 schema: bool = False, 500 joins: bool = False, 501 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 502 parse_bracket: bool = False, 503 is_db_reference: bool = False, 504 parse_partition: bool = False, 505 ) -> t.Optional[exp.Expression]: 506 # DuckDB supports prefix aliases, e.g. FROM foo: bar 507 if self._next and self._next.token_type == TokenType.COLON: 508 alias = self._parse_table_alias( 509 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 510 ) 511 self._match(TokenType.COLON) 512 comments = self._prev_comments or [] 513 else: 514 alias = None 515 comments = [] 516 517 table = super()._parse_table( 518 schema=schema, 519 joins=joins, 520 alias_tokens=alias_tokens, 521 parse_bracket=parse_bracket, 522 is_db_reference=is_db_reference, 523 parse_partition=parse_partition, 524 ) 525 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 526 # Moves the comment next to the alias in `alias: table /* comment */` 527 comments += table.pop_comments() or [] 528 alias.comments = alias.pop_comments() + comments 529 table.set("alias", alias) 530 531 return table 532 533 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 534 # https://duckdb.org/docs/sql/samples.html 535 sample = super()._parse_table_sample(as_modifier=as_modifier) 536 if sample and not sample.args.get("method"): 537 if sample.args.get("size"): 538 sample.set("method", exp.var("RESERVOIR")) 539 else: 540 sample.set("method", exp.var("SYSTEM")) 541 542 return sample 543 544 def _parse_bracket( 545 self, this: t.Optional[exp.Expression] = None 546 ) -> t.Optional[exp.Expression]: 547 bracket = super()._parse_bracket(this) 548 549 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 550 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 551 bracket.set("returns_list_for_maps", True) 552 553 return bracket 554 555 def _parse_map(self) -> exp.ToMap | exp.Map: 556 if self._match(TokenType.L_BRACE, advance=False): 557 return self.expression(exp.ToMap, this=self._parse_bracket()) 558 559 args = self._parse_wrapped_csv(self._parse_assignment) 560 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 561 562 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 563 return self._parse_field_def() 564 565 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 566 if len(aggregations) == 1: 567 return super()._pivot_column_names(aggregations) 568 return pivot_column_names(aggregations, dialect="duckdb") 569 570 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 571 def _parse_attach_option() -> exp.AttachOption: 572 return self.expression( 573 exp.AttachOption, 574 this=self._parse_var(any_token=True), 575 expression=self._parse_field(any_token=True), 576 ) 577 578 self._match(TokenType.DATABASE) 579 exists = self._parse_exists(not_=is_attach) 580 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 581 582 if self._match(TokenType.L_PAREN, advance=False): 583 expressions = self._parse_wrapped_csv(_parse_attach_option) 584 else: 585 expressions = None 586 587 return ( 588 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 589 if is_attach 590 else self.expression(exp.Detach, this=this, exists=exists) 591 ) 592 593 def _parse_show_duckdb(self, this: str) -> exp.Show: 594 return self.expression(exp.Show, this=this) 595 596 class Generator(generator.Generator): 597 PARAMETER_TOKEN = "$" 598 NAMED_PLACEHOLDER_TOKEN = "$" 599 JOIN_HINTS = False 600 TABLE_HINTS = False 601 QUERY_HINTS = False 602 LIMIT_FETCH = "LIMIT" 603 STRUCT_DELIMITER = ("(", ")") 604 RENAME_TABLE_WITH_DB = False 605 NVL2_SUPPORTED = False 606 SEMI_ANTI_JOIN_WITH_SIDE = False 607 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 608 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 609 LAST_DAY_SUPPORTS_DATE_PART = False 610 JSON_KEY_VALUE_PAIR_SEP = "," 611 IGNORE_NULLS_IN_FUNC = True 612 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 613 SUPPORTS_CREATE_TABLE_LIKE = False 614 MULTI_ARG_DISTINCT = False 615 CAN_IMPLEMENT_ARRAY_ANY = True 616 SUPPORTS_TO_NUMBER = False 617 SUPPORTS_WINDOW_EXCLUDE = True 618 COPY_HAS_INTO_KEYWORD = False 619 STAR_EXCEPT = "EXCLUDE" 620 PAD_FILL_PATTERN_IS_REQUIRED = True 621 ARRAY_CONCAT_IS_VAR_LEN = False 622 ARRAY_SIZE_DIM_REQUIRED = False 623 624 TRANSFORMS = { 625 **generator.Generator.TRANSFORMS, 626 exp.ApproxDistinct: approx_count_distinct_sql, 627 exp.Array: inline_array_unless_query, 628 exp.ArrayFilter: rename_func("LIST_FILTER"), 629 exp.ArrayRemove: remove_from_array_using_filter, 630 exp.ArraySort: _array_sort_sql, 631 exp.ArraySum: rename_func("LIST_SUM"), 632 exp.BitwiseXor: rename_func("XOR"), 633 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 634 exp.CurrentDate: lambda *_: "CURRENT_DATE", 635 exp.CurrentTime: lambda *_: "CURRENT_TIME", 636 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 637 exp.DayOfMonth: rename_func("DAYOFMONTH"), 638 exp.DayOfWeek: rename_func("DAYOFWEEK"), 639 exp.DayOfWeekIso: rename_func("ISODOW"), 640 exp.DayOfYear: rename_func("DAYOFYEAR"), 641 exp.DataType: _datatype_sql, 642 exp.Date: _date_sql, 643 exp.DateAdd: _date_delta_sql, 644 exp.DateFromParts: rename_func("MAKE_DATE"), 645 exp.DateSub: _date_delta_sql, 646 exp.DateDiff: _date_diff_sql, 647 exp.DateStrToDate: datestrtodate_sql, 648 exp.Datetime: no_datetime_sql, 649 exp.DatetimeSub: _date_delta_sql, 650 exp.DatetimeAdd: _date_delta_sql, 651 exp.DateToDi: lambda self, 652 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 653 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 654 exp.DiToDate: lambda self, 655 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 656 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 657 exp.GenerateDateArray: _generate_datetime_array_sql, 658 exp.GenerateTimestampArray: _generate_datetime_array_sql, 659 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 660 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 661 exp.Explode: rename_func("UNNEST"), 662 exp.IntDiv: lambda self, e: self.binary(e, "//"), 663 exp.IsInf: rename_func("ISINF"), 664 exp.IsNan: rename_func("ISNAN"), 665 exp.JSONBExists: rename_func("JSON_EXISTS"), 666 exp.JSONExtract: _arrow_json_extract_sql, 667 exp.JSONExtractArray: _json_extract_value_array_sql, 668 exp.JSONExtractScalar: _arrow_json_extract_sql, 669 exp.JSONFormat: _json_format_sql, 670 exp.JSONValueArray: _json_extract_value_array_sql, 671 exp.Lateral: explode_to_unnest_sql, 672 exp.LogicalOr: rename_func("BOOL_OR"), 673 exp.LogicalAnd: rename_func("BOOL_AND"), 674 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 675 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 676 exp.MonthsBetween: lambda self, e: self.func( 677 "DATEDIFF", 678 "'month'", 679 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 680 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 681 ), 682 exp.PercentileCont: rename_func("QUANTILE_CONT"), 683 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 684 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 685 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 686 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 687 exp.RegexpReplace: lambda self, e: self.func( 688 "REGEXP_REPLACE", 689 e.this, 690 e.expression, 691 e.args.get("replacement"), 692 e.args.get("modifiers"), 693 ), 694 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 695 exp.RegexpILike: lambda self, e: self.func( 696 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 697 ), 698 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 699 exp.Return: lambda self, e: self.sql(e, "this"), 700 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 701 exp.Rand: rename_func("RANDOM"), 702 exp.SHA: rename_func("SHA1"), 703 exp.SHA2: sha256_sql, 704 exp.Split: rename_func("STR_SPLIT"), 705 exp.SortArray: _sort_array_sql, 706 exp.StrPosition: strposition_sql, 707 exp.StrToUnix: lambda self, e: self.func( 708 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 709 ), 710 exp.Struct: _struct_sql, 711 exp.Transform: rename_func("LIST_TRANSFORM"), 712 exp.TimeAdd: _date_delta_sql, 713 exp.Time: no_time_sql, 714 exp.TimeDiff: _timediff_sql, 715 exp.Timestamp: no_timestamp_sql, 716 exp.TimestampDiff: lambda self, e: self.func( 717 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 718 ), 719 exp.TimestampTrunc: timestamptrunc_sql(), 720 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 721 exp.TimeStrToTime: timestrtotime_sql, 722 exp.TimeStrToUnix: lambda self, e: self.func( 723 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 724 ), 725 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 726 exp.TimeToUnix: rename_func("EPOCH"), 727 exp.TsOrDiToDi: lambda self, 728 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 729 exp.TsOrDsAdd: _date_delta_sql, 730 exp.TsOrDsDiff: lambda self, e: self.func( 731 "DATE_DIFF", 732 f"'{e.args.get('unit') or 'DAY'}'", 733 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 734 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 735 ), 736 exp.UnixToStr: lambda self, e: self.func( 737 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 738 ), 739 exp.DatetimeTrunc: lambda self, e: self.func( 740 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 741 ), 742 exp.UnixToTime: _unix_to_time_sql, 743 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 744 exp.VariancePop: rename_func("VAR_POP"), 745 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 746 exp.Xor: bool_xor_sql, 747 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 748 rename_func("LEVENSHTEIN") 749 ), 750 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 751 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 752 exp.DateBin: rename_func("TIME_BUCKET"), 753 } 754 755 SUPPORTED_JSON_PATH_PARTS = { 756 exp.JSONPathKey, 757 exp.JSONPathRoot, 758 exp.JSONPathSubscript, 759 exp.JSONPathWildcard, 760 } 761 762 TYPE_MAPPING = { 763 **generator.Generator.TYPE_MAPPING, 764 exp.DataType.Type.BINARY: "BLOB", 765 exp.DataType.Type.BPCHAR: "TEXT", 766 exp.DataType.Type.CHAR: "TEXT", 767 exp.DataType.Type.DATETIME: "TIMESTAMP", 768 exp.DataType.Type.FLOAT: "REAL", 769 exp.DataType.Type.JSONB: "JSON", 770 exp.DataType.Type.NCHAR: "TEXT", 771 exp.DataType.Type.NVARCHAR: "TEXT", 772 exp.DataType.Type.UINT: "UINTEGER", 773 exp.DataType.Type.VARBINARY: "BLOB", 774 exp.DataType.Type.ROWVERSION: "BLOB", 775 exp.DataType.Type.VARCHAR: "TEXT", 776 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 777 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 778 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 779 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 780 } 781 782 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 783 RESERVED_KEYWORDS = { 784 "array", 785 "analyse", 786 "union", 787 "all", 788 "when", 789 "in_p", 790 "default", 791 "create_p", 792 "window", 793 "asymmetric", 794 "to", 795 "else", 796 "localtime", 797 "from", 798 "end_p", 799 "select", 800 "current_date", 801 "foreign", 802 "with", 803 "grant", 804 "session_user", 805 "or", 806 "except", 807 "references", 808 "fetch", 809 "limit", 810 "group_p", 811 "leading", 812 "into", 813 "collate", 814 "offset", 815 "do", 816 "then", 817 "localtimestamp", 818 "check_p", 819 "lateral_p", 820 "current_role", 821 "where", 822 "asc_p", 823 "placing", 824 "desc_p", 825 "user", 826 "unique", 827 "initially", 828 "column", 829 "both", 830 "some", 831 "as", 832 "any", 833 "only", 834 "deferrable", 835 "null_p", 836 "current_time", 837 "true_p", 838 "table", 839 "case", 840 "trailing", 841 "variadic", 842 "for", 843 "on", 844 "distinct", 845 "false_p", 846 "not", 847 "constraint", 848 "current_timestamp", 849 "returning", 850 "primary", 851 "intersect", 852 "having", 853 "analyze", 854 "current_user", 855 "and", 856 "cast", 857 "symmetric", 858 "using", 859 "order", 860 "current_catalog", 861 } 862 863 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 864 865 # DuckDB doesn't generally support CREATE TABLE .. properties 866 # https://duckdb.org/docs/sql/statements/create_table.html 867 PROPERTIES_LOCATION = { 868 prop: exp.Properties.Location.UNSUPPORTED 869 for prop in generator.Generator.PROPERTIES_LOCATION 870 } 871 872 # There are a few exceptions (e.g. temporary tables) which are supported or 873 # can be transpiled to DuckDB, so we explicitly override them accordingly 874 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 875 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 876 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 877 878 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 879 exp.FirstValue, 880 exp.Lag, 881 exp.LastValue, 882 exp.Lead, 883 exp.NthValue, 884 ) 885 886 def show_sql(self, expression: exp.Show) -> str: 887 return f"SHOW {expression.name}" 888 889 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 890 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 891 892 def strtotime_sql(self, expression: exp.StrToTime) -> str: 893 if expression.args.get("safe"): 894 formatted_time = self.format_time(expression) 895 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 896 return str_to_time_sql(self, expression) 897 898 def strtodate_sql(self, expression: exp.StrToDate) -> str: 899 if expression.args.get("safe"): 900 formatted_time = self.format_time(expression) 901 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 902 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 903 904 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 905 arg = expression.this 906 if expression.args.get("safe"): 907 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 908 return self.func("JSON", arg) 909 910 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 911 nano = expression.args.get("nano") 912 if nano is not None: 913 expression.set( 914 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 915 ) 916 917 return rename_func("MAKE_TIME")(self, expression) 918 919 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 920 sec = expression.args["sec"] 921 922 milli = expression.args.get("milli") 923 if milli is not None: 924 sec += milli.pop() / exp.Literal.number(1000.0) 925 926 nano = expression.args.get("nano") 927 if nano is not None: 928 sec += nano.pop() / exp.Literal.number(1000000000.0) 929 930 if milli or nano: 931 expression.set("sec", sec) 932 933 return rename_func("MAKE_TIMESTAMP")(self, expression) 934 935 def tablesample_sql( 936 self, 937 expression: exp.TableSample, 938 tablesample_keyword: t.Optional[str] = None, 939 ) -> str: 940 if not isinstance(expression.parent, exp.Select): 941 # This sample clause only applies to a single source, not the entire resulting relation 942 tablesample_keyword = "TABLESAMPLE" 943 944 if expression.args.get("size"): 945 method = expression.args.get("method") 946 if method and method.name.upper() != "RESERVOIR": 947 self.unsupported( 948 f"Sampling method {method} is not supported with a discrete sample count, " 949 "defaulting to reservoir sampling" 950 ) 951 expression.set("method", exp.var("RESERVOIR")) 952 953 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 954 955 def interval_sql(self, expression: exp.Interval) -> str: 956 multiplier: t.Optional[int] = None 957 unit = expression.text("unit").lower() 958 959 if unit.startswith("week"): 960 multiplier = 7 961 if unit.startswith("quarter"): 962 multiplier = 90 963 964 if multiplier: 965 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 966 967 return super().interval_sql(expression) 968 969 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 970 if isinstance(expression.parent, exp.UserDefinedFunction): 971 return self.sql(expression, "this") 972 return super().columndef_sql(expression, sep) 973 974 def join_sql(self, expression: exp.Join) -> str: 975 if ( 976 expression.side == "LEFT" 977 and not expression.args.get("on") 978 and isinstance(expression.this, exp.Unnest) 979 ): 980 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 981 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 982 return super().join_sql(expression.on(exp.true())) 983 984 return super().join_sql(expression) 985 986 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 987 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 988 if expression.args.get("is_end_exclusive"): 989 return rename_func("RANGE")(self, expression) 990 991 return self.function_fallback_sql(expression) 992 993 def countif_sql(self, expression: exp.CountIf) -> str: 994 if self.dialect.version >= Version("1.2"): 995 return self.function_fallback_sql(expression) 996 997 # https://github.com/tobymao/sqlglot/pull/4749 998 return count_if_to_sum(self, expression) 999 1000 def bracket_sql(self, expression: exp.Bracket) -> str: 1001 if self.dialect.version >= Version("1.2"): 1002 return super().bracket_sql(expression) 1003 1004 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1005 this = expression.this 1006 if isinstance(this, exp.Array): 1007 this.replace(exp.paren(this)) 1008 1009 bracket = super().bracket_sql(expression) 1010 1011 if not expression.args.get("returns_list_for_maps"): 1012 if not this.type: 1013 from sqlglot.optimizer.annotate_types import annotate_types 1014 1015 this = annotate_types(this, dialect=self.dialect) 1016 1017 if this.is_type(exp.DataType.Type.MAP): 1018 bracket = f"({bracket})[1]" 1019 1020 return bracket 1021 1022 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1023 expression_sql = self.sql(expression, "expression") 1024 1025 func = expression.this 1026 if isinstance(func, exp.PERCENTILES): 1027 # Make the order key the first arg and slide the fraction to the right 1028 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1029 order_col = expression.find(exp.Ordered) 1030 if order_col: 1031 func.set("expression", func.this) 1032 func.set("this", order_col.this) 1033 1034 this = self.sql(expression, "this").rstrip(")") 1035 1036 return f"{this}{expression_sql})" 1037 1038 def length_sql(self, expression: exp.Length) -> str: 1039 arg = expression.this 1040 1041 # Dialects like BQ and Snowflake also accept binary values as args, so 1042 # DDB will attempt to infer the type or resort to case/when resolution 1043 if not expression.args.get("binary") or arg.is_string: 1044 return self.func("LENGTH", arg) 1045 1046 if not arg.type: 1047 from sqlglot.optimizer.annotate_types import annotate_types 1048 1049 arg = annotate_types(arg, dialect=self.dialect) 1050 1051 if arg.is_type(*exp.DataType.TEXT_TYPES): 1052 return self.func("LENGTH", arg) 1053 1054 # We need these casts to make duckdb's static type checker happy 1055 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1056 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1057 1058 case = ( 1059 exp.case(self.func("TYPEOF", arg)) 1060 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1061 .else_( 1062 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1063 ) # anonymous to break length_sql recursion 1064 ) 1065 1066 return self.sql(case) 1067 1068 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1069 this = expression.this 1070 key = expression.args.get("key") 1071 key_sql = key.name if isinstance(key, exp.Expression) else "" 1072 value_sql = self.sql(expression, "value") 1073 1074 kv_sql = f"{key_sql} := {value_sql}" 1075 1076 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1077 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1078 if isinstance(this, exp.Struct) and not this.expressions: 1079 return self.func("STRUCT_PACK", kv_sql) 1080 1081 return self.func("STRUCT_INSERT", this, kv_sql) 1082 1083 def unnest_sql(self, expression: exp.Unnest) -> str: 1084 explode_array = expression.args.get("explode_array") 1085 if explode_array: 1086 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1087 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1088 expression.expressions.append( 1089 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1090 ) 1091 1092 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1093 alias = expression.args.get("alias") 1094 if alias: 1095 expression.set("alias", None) 1096 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1097 1098 unnest_sql = super().unnest_sql(expression) 1099 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1100 return self.sql(select) 1101 1102 return super().unnest_sql(expression) 1103 1104 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1105 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1106 # DuckDB should render IGNORE NULLS only for the general-purpose 1107 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1108 return super().ignorenulls_sql(expression) 1109 1110 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1111 return self.sql(expression, "this") 1112 1113 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1114 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1115 # DuckDB should render RESPECT NULLS only for the general-purpose 1116 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1117 return super().respectnulls_sql(expression) 1118 1119 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1120 return self.sql(expression, "this") 1121 1122 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1123 this = self.sql(expression, "this") 1124 null_text = self.sql(expression, "null") 1125 1126 if null_text: 1127 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1128 1129 return self.func("ARRAY_TO_STRING", this, expression.expression) 1130 1131 @unsupported_args("position", "occurrence") 1132 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1133 group = expression.args.get("group") 1134 params = expression.args.get("parameters") 1135 1136 # Do not render group if there is no following argument, 1137 # and it's the default value for this dialect 1138 if ( 1139 not params 1140 and group 1141 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1142 ): 1143 group = None 1144 return self.func( 1145 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1146 ) 1147 1148 @unsupported_args("culture") 1149 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1150 fmt = expression.args.get("format") 1151 if fmt and fmt.is_int: 1152 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1153 1154 self.unsupported("Only integer formats are supported by NumberToStr") 1155 return self.function_fallback_sql(expression) 1156 1157 def autoincrementcolumnconstraint_sql(self, _) -> str: 1158 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1159 return ""
280class DuckDB(Dialect): 281 NULL_ORDERING = "nulls_are_last" 282 SUPPORTS_USER_DEFINED_TYPES = True 283 SAFE_DIVISION = True 284 INDEX_OFFSET = 1 285 CONCAT_COALESCE = True 286 SUPPORTS_ORDER_BY_ALL = True 287 SUPPORTS_FIXED_SIZE_ARRAYS = True 288 STRICT_JSON_PATH_SYNTAX = False 289 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 290 291 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 292 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 293 294 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 295 if isinstance(path, exp.Literal): 296 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 297 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 298 # This check ensures we'll avoid trying to parse these as JSON paths, which can 299 # either result in a noisy warning or in an invalid representation of the path. 300 path_text = path.name 301 if path_text.startswith("/") or "[#" in path_text: 302 return path 303 304 return super().to_json_path(path) 305 306 class Tokenizer(tokens.Tokenizer): 307 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 308 HEREDOC_STRINGS = ["$"] 309 310 HEREDOC_TAG_IS_IDENTIFIER = True 311 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 312 313 KEYWORDS = { 314 **tokens.Tokenizer.KEYWORDS, 315 "//": TokenType.DIV, 316 "**": TokenType.DSTAR, 317 "^@": TokenType.CARET_AT, 318 "@>": TokenType.AT_GT, 319 "<@": TokenType.LT_AT, 320 "ATTACH": TokenType.ATTACH, 321 "BINARY": TokenType.VARBINARY, 322 "BITSTRING": TokenType.BIT, 323 "BPCHAR": TokenType.TEXT, 324 "CHAR": TokenType.TEXT, 325 "DATETIME": TokenType.TIMESTAMPNTZ, 326 "DETACH": TokenType.DETACH, 327 "EXCLUDE": TokenType.EXCEPT, 328 "LOGICAL": TokenType.BOOLEAN, 329 "ONLY": TokenType.ONLY, 330 "PIVOT_WIDER": TokenType.PIVOT, 331 "POSITIONAL": TokenType.POSITIONAL, 332 "SIGNED": TokenType.INT, 333 "STRING": TokenType.TEXT, 334 "SUMMARIZE": TokenType.SUMMARIZE, 335 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 336 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 337 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 338 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 339 "TIMESTAMP_US": TokenType.TIMESTAMP, 340 "UBIGINT": TokenType.UBIGINT, 341 "UINTEGER": TokenType.UINT, 342 "USMALLINT": TokenType.USMALLINT, 343 "UTINYINT": TokenType.UTINYINT, 344 "VARCHAR": TokenType.TEXT, 345 } 346 KEYWORDS.pop("/*+") 347 348 SINGLE_TOKENS = { 349 **tokens.Tokenizer.SINGLE_TOKENS, 350 "$": TokenType.PARAMETER, 351 } 352 353 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 354 355 class Parser(parser.Parser): 356 BITWISE = { 357 **parser.Parser.BITWISE, 358 TokenType.TILDA: exp.RegexpLike, 359 } 360 BITWISE.pop(TokenType.CARET) 361 362 RANGE_PARSERS = { 363 **parser.Parser.RANGE_PARSERS, 364 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 365 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 366 } 367 368 EXPONENT = { 369 **parser.Parser.EXPONENT, 370 TokenType.CARET: exp.Pow, 371 TokenType.DSTAR: exp.Pow, 372 } 373 374 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 375 376 SHOW_PARSERS = { 377 "TABLES": _show_parser("TABLES"), 378 "ALL TABLES": _show_parser("ALL TABLES"), 379 } 380 381 FUNCTIONS = { 382 **parser.Parser.FUNCTIONS, 383 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 384 "ARRAY_SORT": exp.SortArray.from_arg_list, 385 "DATEDIFF": _build_date_diff, 386 "DATE_DIFF": _build_date_diff, 387 "DATE_TRUNC": date_trunc_to_time, 388 "DATETRUNC": date_trunc_to_time, 389 "DECODE": lambda args: exp.Decode( 390 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 391 ), 392 "EDITDIST3": exp.Levenshtein.from_arg_list, 393 "ENCODE": lambda args: exp.Encode( 394 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 395 ), 396 "EPOCH": exp.TimeToUnix.from_arg_list, 397 "EPOCH_MS": lambda args: exp.UnixToTime( 398 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 399 ), 400 "GENERATE_SERIES": _build_generate_series(), 401 "JSON": exp.ParseJSON.from_arg_list, 402 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 403 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 404 "LIST_HAS": exp.ArrayContains.from_arg_list, 405 "LIST_REVERSE_SORT": _build_sort_array_desc, 406 "LIST_SORT": exp.SortArray.from_arg_list, 407 "LIST_VALUE": lambda args: exp.Array(expressions=args), 408 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 409 "MAKE_TIMESTAMP": _build_make_timestamp, 410 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 411 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 412 "RANGE": _build_generate_series(end_exclusive=True), 413 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 414 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 415 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 416 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 417 this=seq_get(args, 0), 418 expression=seq_get(args, 1), 419 replacement=seq_get(args, 2), 420 modifiers=seq_get(args, 3), 421 ), 422 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 423 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 424 "STRING_SPLIT": exp.Split.from_arg_list, 425 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 426 "STRING_TO_ARRAY": exp.Split.from_arg_list, 427 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 428 "STRUCT_PACK": exp.Struct.from_arg_list, 429 "STR_SPLIT": exp.Split.from_arg_list, 430 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 431 "TIME_BUCKET": exp.DateBin.from_arg_list, 432 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 433 "UNNEST": exp.Explode.from_arg_list, 434 "XOR": binary_from_function(exp.BitwiseXor), 435 } 436 437 FUNCTIONS.pop("DATE_SUB") 438 FUNCTIONS.pop("GLOB") 439 440 FUNCTION_PARSERS = { 441 **parser.Parser.FUNCTION_PARSERS, 442 **dict.fromkeys( 443 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 444 ), 445 } 446 FUNCTION_PARSERS.pop("DECODE") 447 448 NO_PAREN_FUNCTION_PARSERS = { 449 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 450 "MAP": lambda self: self._parse_map(), 451 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 452 } 453 454 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 455 TokenType.SEMI, 456 TokenType.ANTI, 457 } 458 459 PLACEHOLDER_PARSERS = { 460 **parser.Parser.PLACEHOLDER_PARSERS, 461 TokenType.PARAMETER: lambda self: ( 462 self.expression(exp.Placeholder, this=self._prev.text) 463 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 464 else None 465 ), 466 } 467 468 TYPE_CONVERTERS = { 469 # https://duckdb.org/docs/sql/data_types/numeric 470 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 471 # https://duckdb.org/docs/sql/data_types/text 472 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 473 } 474 475 STATEMENT_PARSERS = { 476 **parser.Parser.STATEMENT_PARSERS, 477 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 478 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 479 TokenType.SHOW: lambda self: self._parse_show(), 480 } 481 482 def _parse_expression(self) -> t.Optional[exp.Expression]: 483 # DuckDB supports prefix aliases, e.g. foo: 1 484 if self._next and self._next.token_type == TokenType.COLON: 485 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 486 self._match(TokenType.COLON) 487 comments = self._prev_comments or [] 488 489 this = self._parse_assignment() 490 if isinstance(this, exp.Expression): 491 # Moves the comment next to the alias in `alias: expr /* comment */` 492 comments += this.pop_comments() or [] 493 494 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 495 496 return super()._parse_expression() 497 498 def _parse_table( 499 self, 500 schema: bool = False, 501 joins: bool = False, 502 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 503 parse_bracket: bool = False, 504 is_db_reference: bool = False, 505 parse_partition: bool = False, 506 ) -> t.Optional[exp.Expression]: 507 # DuckDB supports prefix aliases, e.g. FROM foo: bar 508 if self._next and self._next.token_type == TokenType.COLON: 509 alias = self._parse_table_alias( 510 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 511 ) 512 self._match(TokenType.COLON) 513 comments = self._prev_comments or [] 514 else: 515 alias = None 516 comments = [] 517 518 table = super()._parse_table( 519 schema=schema, 520 joins=joins, 521 alias_tokens=alias_tokens, 522 parse_bracket=parse_bracket, 523 is_db_reference=is_db_reference, 524 parse_partition=parse_partition, 525 ) 526 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 527 # Moves the comment next to the alias in `alias: table /* comment */` 528 comments += table.pop_comments() or [] 529 alias.comments = alias.pop_comments() + comments 530 table.set("alias", alias) 531 532 return table 533 534 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 535 # https://duckdb.org/docs/sql/samples.html 536 sample = super()._parse_table_sample(as_modifier=as_modifier) 537 if sample and not sample.args.get("method"): 538 if sample.args.get("size"): 539 sample.set("method", exp.var("RESERVOIR")) 540 else: 541 sample.set("method", exp.var("SYSTEM")) 542 543 return sample 544 545 def _parse_bracket( 546 self, this: t.Optional[exp.Expression] = None 547 ) -> t.Optional[exp.Expression]: 548 bracket = super()._parse_bracket(this) 549 550 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 551 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 552 bracket.set("returns_list_for_maps", True) 553 554 return bracket 555 556 def _parse_map(self) -> exp.ToMap | exp.Map: 557 if self._match(TokenType.L_BRACE, advance=False): 558 return self.expression(exp.ToMap, this=self._parse_bracket()) 559 560 args = self._parse_wrapped_csv(self._parse_assignment) 561 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 562 563 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 564 return self._parse_field_def() 565 566 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 567 if len(aggregations) == 1: 568 return super()._pivot_column_names(aggregations) 569 return pivot_column_names(aggregations, dialect="duckdb") 570 571 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 572 def _parse_attach_option() -> exp.AttachOption: 573 return self.expression( 574 exp.AttachOption, 575 this=self._parse_var(any_token=True), 576 expression=self._parse_field(any_token=True), 577 ) 578 579 self._match(TokenType.DATABASE) 580 exists = self._parse_exists(not_=is_attach) 581 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 582 583 if self._match(TokenType.L_PAREN, advance=False): 584 expressions = self._parse_wrapped_csv(_parse_attach_option) 585 else: 586 expressions = None 587 588 return ( 589 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 590 if is_attach 591 else self.expression(exp.Detach, this=this, exists=exists) 592 ) 593 594 def _parse_show_duckdb(self, this: str) -> exp.Show: 595 return self.expression(exp.Show, this=this) 596 597 class Generator(generator.Generator): 598 PARAMETER_TOKEN = "$" 599 NAMED_PLACEHOLDER_TOKEN = "$" 600 JOIN_HINTS = False 601 TABLE_HINTS = False 602 QUERY_HINTS = False 603 LIMIT_FETCH = "LIMIT" 604 STRUCT_DELIMITER = ("(", ")") 605 RENAME_TABLE_WITH_DB = False 606 NVL2_SUPPORTED = False 607 SEMI_ANTI_JOIN_WITH_SIDE = False 608 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 609 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 610 LAST_DAY_SUPPORTS_DATE_PART = False 611 JSON_KEY_VALUE_PAIR_SEP = "," 612 IGNORE_NULLS_IN_FUNC = True 613 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 614 SUPPORTS_CREATE_TABLE_LIKE = False 615 MULTI_ARG_DISTINCT = False 616 CAN_IMPLEMENT_ARRAY_ANY = True 617 SUPPORTS_TO_NUMBER = False 618 SUPPORTS_WINDOW_EXCLUDE = True 619 COPY_HAS_INTO_KEYWORD = False 620 STAR_EXCEPT = "EXCLUDE" 621 PAD_FILL_PATTERN_IS_REQUIRED = True 622 ARRAY_CONCAT_IS_VAR_LEN = False 623 ARRAY_SIZE_DIM_REQUIRED = False 624 625 TRANSFORMS = { 626 **generator.Generator.TRANSFORMS, 627 exp.ApproxDistinct: approx_count_distinct_sql, 628 exp.Array: inline_array_unless_query, 629 exp.ArrayFilter: rename_func("LIST_FILTER"), 630 exp.ArrayRemove: remove_from_array_using_filter, 631 exp.ArraySort: _array_sort_sql, 632 exp.ArraySum: rename_func("LIST_SUM"), 633 exp.BitwiseXor: rename_func("XOR"), 634 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 635 exp.CurrentDate: lambda *_: "CURRENT_DATE", 636 exp.CurrentTime: lambda *_: "CURRENT_TIME", 637 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 638 exp.DayOfMonth: rename_func("DAYOFMONTH"), 639 exp.DayOfWeek: rename_func("DAYOFWEEK"), 640 exp.DayOfWeekIso: rename_func("ISODOW"), 641 exp.DayOfYear: rename_func("DAYOFYEAR"), 642 exp.DataType: _datatype_sql, 643 exp.Date: _date_sql, 644 exp.DateAdd: _date_delta_sql, 645 exp.DateFromParts: rename_func("MAKE_DATE"), 646 exp.DateSub: _date_delta_sql, 647 exp.DateDiff: _date_diff_sql, 648 exp.DateStrToDate: datestrtodate_sql, 649 exp.Datetime: no_datetime_sql, 650 exp.DatetimeSub: _date_delta_sql, 651 exp.DatetimeAdd: _date_delta_sql, 652 exp.DateToDi: lambda self, 653 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 654 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 655 exp.DiToDate: lambda self, 656 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 657 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 658 exp.GenerateDateArray: _generate_datetime_array_sql, 659 exp.GenerateTimestampArray: _generate_datetime_array_sql, 660 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 661 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 662 exp.Explode: rename_func("UNNEST"), 663 exp.IntDiv: lambda self, e: self.binary(e, "//"), 664 exp.IsInf: rename_func("ISINF"), 665 exp.IsNan: rename_func("ISNAN"), 666 exp.JSONBExists: rename_func("JSON_EXISTS"), 667 exp.JSONExtract: _arrow_json_extract_sql, 668 exp.JSONExtractArray: _json_extract_value_array_sql, 669 exp.JSONExtractScalar: _arrow_json_extract_sql, 670 exp.JSONFormat: _json_format_sql, 671 exp.JSONValueArray: _json_extract_value_array_sql, 672 exp.Lateral: explode_to_unnest_sql, 673 exp.LogicalOr: rename_func("BOOL_OR"), 674 exp.LogicalAnd: rename_func("BOOL_AND"), 675 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 676 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 677 exp.MonthsBetween: lambda self, e: self.func( 678 "DATEDIFF", 679 "'month'", 680 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 681 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 682 ), 683 exp.PercentileCont: rename_func("QUANTILE_CONT"), 684 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 685 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 686 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 687 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 688 exp.RegexpReplace: lambda self, e: self.func( 689 "REGEXP_REPLACE", 690 e.this, 691 e.expression, 692 e.args.get("replacement"), 693 e.args.get("modifiers"), 694 ), 695 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 696 exp.RegexpILike: lambda self, e: self.func( 697 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 698 ), 699 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 700 exp.Return: lambda self, e: self.sql(e, "this"), 701 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 702 exp.Rand: rename_func("RANDOM"), 703 exp.SHA: rename_func("SHA1"), 704 exp.SHA2: sha256_sql, 705 exp.Split: rename_func("STR_SPLIT"), 706 exp.SortArray: _sort_array_sql, 707 exp.StrPosition: strposition_sql, 708 exp.StrToUnix: lambda self, e: self.func( 709 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 710 ), 711 exp.Struct: _struct_sql, 712 exp.Transform: rename_func("LIST_TRANSFORM"), 713 exp.TimeAdd: _date_delta_sql, 714 exp.Time: no_time_sql, 715 exp.TimeDiff: _timediff_sql, 716 exp.Timestamp: no_timestamp_sql, 717 exp.TimestampDiff: lambda self, e: self.func( 718 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 719 ), 720 exp.TimestampTrunc: timestamptrunc_sql(), 721 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 722 exp.TimeStrToTime: timestrtotime_sql, 723 exp.TimeStrToUnix: lambda self, e: self.func( 724 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 725 ), 726 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 727 exp.TimeToUnix: rename_func("EPOCH"), 728 exp.TsOrDiToDi: lambda self, 729 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 730 exp.TsOrDsAdd: _date_delta_sql, 731 exp.TsOrDsDiff: lambda self, e: self.func( 732 "DATE_DIFF", 733 f"'{e.args.get('unit') or 'DAY'}'", 734 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 735 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 736 ), 737 exp.UnixToStr: lambda self, e: self.func( 738 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 739 ), 740 exp.DatetimeTrunc: lambda self, e: self.func( 741 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 742 ), 743 exp.UnixToTime: _unix_to_time_sql, 744 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 745 exp.VariancePop: rename_func("VAR_POP"), 746 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 747 exp.Xor: bool_xor_sql, 748 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 749 rename_func("LEVENSHTEIN") 750 ), 751 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 752 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 753 exp.DateBin: rename_func("TIME_BUCKET"), 754 } 755 756 SUPPORTED_JSON_PATH_PARTS = { 757 exp.JSONPathKey, 758 exp.JSONPathRoot, 759 exp.JSONPathSubscript, 760 exp.JSONPathWildcard, 761 } 762 763 TYPE_MAPPING = { 764 **generator.Generator.TYPE_MAPPING, 765 exp.DataType.Type.BINARY: "BLOB", 766 exp.DataType.Type.BPCHAR: "TEXT", 767 exp.DataType.Type.CHAR: "TEXT", 768 exp.DataType.Type.DATETIME: "TIMESTAMP", 769 exp.DataType.Type.FLOAT: "REAL", 770 exp.DataType.Type.JSONB: "JSON", 771 exp.DataType.Type.NCHAR: "TEXT", 772 exp.DataType.Type.NVARCHAR: "TEXT", 773 exp.DataType.Type.UINT: "UINTEGER", 774 exp.DataType.Type.VARBINARY: "BLOB", 775 exp.DataType.Type.ROWVERSION: "BLOB", 776 exp.DataType.Type.VARCHAR: "TEXT", 777 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 778 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 779 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 780 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 781 } 782 783 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 784 RESERVED_KEYWORDS = { 785 "array", 786 "analyse", 787 "union", 788 "all", 789 "when", 790 "in_p", 791 "default", 792 "create_p", 793 "window", 794 "asymmetric", 795 "to", 796 "else", 797 "localtime", 798 "from", 799 "end_p", 800 "select", 801 "current_date", 802 "foreign", 803 "with", 804 "grant", 805 "session_user", 806 "or", 807 "except", 808 "references", 809 "fetch", 810 "limit", 811 "group_p", 812 "leading", 813 "into", 814 "collate", 815 "offset", 816 "do", 817 "then", 818 "localtimestamp", 819 "check_p", 820 "lateral_p", 821 "current_role", 822 "where", 823 "asc_p", 824 "placing", 825 "desc_p", 826 "user", 827 "unique", 828 "initially", 829 "column", 830 "both", 831 "some", 832 "as", 833 "any", 834 "only", 835 "deferrable", 836 "null_p", 837 "current_time", 838 "true_p", 839 "table", 840 "case", 841 "trailing", 842 "variadic", 843 "for", 844 "on", 845 "distinct", 846 "false_p", 847 "not", 848 "constraint", 849 "current_timestamp", 850 "returning", 851 "primary", 852 "intersect", 853 "having", 854 "analyze", 855 "current_user", 856 "and", 857 "cast", 858 "symmetric", 859 "using", 860 "order", 861 "current_catalog", 862 } 863 864 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 865 866 # DuckDB doesn't generally support CREATE TABLE .. properties 867 # https://duckdb.org/docs/sql/statements/create_table.html 868 PROPERTIES_LOCATION = { 869 prop: exp.Properties.Location.UNSUPPORTED 870 for prop in generator.Generator.PROPERTIES_LOCATION 871 } 872 873 # There are a few exceptions (e.g. temporary tables) which are supported or 874 # can be transpiled to DuckDB, so we explicitly override them accordingly 875 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 876 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 877 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 878 879 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 880 exp.FirstValue, 881 exp.Lag, 882 exp.LastValue, 883 exp.Lead, 884 exp.NthValue, 885 ) 886 887 def show_sql(self, expression: exp.Show) -> str: 888 return f"SHOW {expression.name}" 889 890 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 891 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 892 893 def strtotime_sql(self, expression: exp.StrToTime) -> str: 894 if expression.args.get("safe"): 895 formatted_time = self.format_time(expression) 896 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 897 return str_to_time_sql(self, expression) 898 899 def strtodate_sql(self, expression: exp.StrToDate) -> str: 900 if expression.args.get("safe"): 901 formatted_time = self.format_time(expression) 902 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 903 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 904 905 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 906 arg = expression.this 907 if expression.args.get("safe"): 908 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 909 return self.func("JSON", arg) 910 911 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 912 nano = expression.args.get("nano") 913 if nano is not None: 914 expression.set( 915 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 916 ) 917 918 return rename_func("MAKE_TIME")(self, expression) 919 920 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 921 sec = expression.args["sec"] 922 923 milli = expression.args.get("milli") 924 if milli is not None: 925 sec += milli.pop() / exp.Literal.number(1000.0) 926 927 nano = expression.args.get("nano") 928 if nano is not None: 929 sec += nano.pop() / exp.Literal.number(1000000000.0) 930 931 if milli or nano: 932 expression.set("sec", sec) 933 934 return rename_func("MAKE_TIMESTAMP")(self, expression) 935 936 def tablesample_sql( 937 self, 938 expression: exp.TableSample, 939 tablesample_keyword: t.Optional[str] = None, 940 ) -> str: 941 if not isinstance(expression.parent, exp.Select): 942 # This sample clause only applies to a single source, not the entire resulting relation 943 tablesample_keyword = "TABLESAMPLE" 944 945 if expression.args.get("size"): 946 method = expression.args.get("method") 947 if method and method.name.upper() != "RESERVOIR": 948 self.unsupported( 949 f"Sampling method {method} is not supported with a discrete sample count, " 950 "defaulting to reservoir sampling" 951 ) 952 expression.set("method", exp.var("RESERVOIR")) 953 954 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 955 956 def interval_sql(self, expression: exp.Interval) -> str: 957 multiplier: t.Optional[int] = None 958 unit = expression.text("unit").lower() 959 960 if unit.startswith("week"): 961 multiplier = 7 962 if unit.startswith("quarter"): 963 multiplier = 90 964 965 if multiplier: 966 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 967 968 return super().interval_sql(expression) 969 970 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 971 if isinstance(expression.parent, exp.UserDefinedFunction): 972 return self.sql(expression, "this") 973 return super().columndef_sql(expression, sep) 974 975 def join_sql(self, expression: exp.Join) -> str: 976 if ( 977 expression.side == "LEFT" 978 and not expression.args.get("on") 979 and isinstance(expression.this, exp.Unnest) 980 ): 981 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 982 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 983 return super().join_sql(expression.on(exp.true())) 984 985 return super().join_sql(expression) 986 987 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 988 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 989 if expression.args.get("is_end_exclusive"): 990 return rename_func("RANGE")(self, expression) 991 992 return self.function_fallback_sql(expression) 993 994 def countif_sql(self, expression: exp.CountIf) -> str: 995 if self.dialect.version >= Version("1.2"): 996 return self.function_fallback_sql(expression) 997 998 # https://github.com/tobymao/sqlglot/pull/4749 999 return count_if_to_sum(self, expression) 1000 1001 def bracket_sql(self, expression: exp.Bracket) -> str: 1002 if self.dialect.version >= Version("1.2"): 1003 return super().bracket_sql(expression) 1004 1005 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1006 this = expression.this 1007 if isinstance(this, exp.Array): 1008 this.replace(exp.paren(this)) 1009 1010 bracket = super().bracket_sql(expression) 1011 1012 if not expression.args.get("returns_list_for_maps"): 1013 if not this.type: 1014 from sqlglot.optimizer.annotate_types import annotate_types 1015 1016 this = annotate_types(this, dialect=self.dialect) 1017 1018 if this.is_type(exp.DataType.Type.MAP): 1019 bracket = f"({bracket})[1]" 1020 1021 return bracket 1022 1023 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1024 expression_sql = self.sql(expression, "expression") 1025 1026 func = expression.this 1027 if isinstance(func, exp.PERCENTILES): 1028 # Make the order key the first arg and slide the fraction to the right 1029 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1030 order_col = expression.find(exp.Ordered) 1031 if order_col: 1032 func.set("expression", func.this) 1033 func.set("this", order_col.this) 1034 1035 this = self.sql(expression, "this").rstrip(")") 1036 1037 return f"{this}{expression_sql})" 1038 1039 def length_sql(self, expression: exp.Length) -> str: 1040 arg = expression.this 1041 1042 # Dialects like BQ and Snowflake also accept binary values as args, so 1043 # DDB will attempt to infer the type or resort to case/when resolution 1044 if not expression.args.get("binary") or arg.is_string: 1045 return self.func("LENGTH", arg) 1046 1047 if not arg.type: 1048 from sqlglot.optimizer.annotate_types import annotate_types 1049 1050 arg = annotate_types(arg, dialect=self.dialect) 1051 1052 if arg.is_type(*exp.DataType.TEXT_TYPES): 1053 return self.func("LENGTH", arg) 1054 1055 # We need these casts to make duckdb's static type checker happy 1056 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1057 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1058 1059 case = ( 1060 exp.case(self.func("TYPEOF", arg)) 1061 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1062 .else_( 1063 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1064 ) # anonymous to break length_sql recursion 1065 ) 1066 1067 return self.sql(case) 1068 1069 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1070 this = expression.this 1071 key = expression.args.get("key") 1072 key_sql = key.name if isinstance(key, exp.Expression) else "" 1073 value_sql = self.sql(expression, "value") 1074 1075 kv_sql = f"{key_sql} := {value_sql}" 1076 1077 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1078 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1079 if isinstance(this, exp.Struct) and not this.expressions: 1080 return self.func("STRUCT_PACK", kv_sql) 1081 1082 return self.func("STRUCT_INSERT", this, kv_sql) 1083 1084 def unnest_sql(self, expression: exp.Unnest) -> str: 1085 explode_array = expression.args.get("explode_array") 1086 if explode_array: 1087 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1088 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1089 expression.expressions.append( 1090 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1091 ) 1092 1093 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1094 alias = expression.args.get("alias") 1095 if alias: 1096 expression.set("alias", None) 1097 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1098 1099 unnest_sql = super().unnest_sql(expression) 1100 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1101 return self.sql(select) 1102 1103 return super().unnest_sql(expression) 1104 1105 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1106 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1107 # DuckDB should render IGNORE NULLS only for the general-purpose 1108 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1109 return super().ignorenulls_sql(expression) 1110 1111 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1112 return self.sql(expression, "this") 1113 1114 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1115 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1116 # DuckDB should render RESPECT NULLS only for the general-purpose 1117 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1118 return super().respectnulls_sql(expression) 1119 1120 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1121 return self.sql(expression, "this") 1122 1123 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1124 this = self.sql(expression, "this") 1125 null_text = self.sql(expression, "null") 1126 1127 if null_text: 1128 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1129 1130 return self.func("ARRAY_TO_STRING", this, expression.expression) 1131 1132 @unsupported_args("position", "occurrence") 1133 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1134 group = expression.args.get("group") 1135 params = expression.args.get("parameters") 1136 1137 # Do not render group if there is no following argument, 1138 # and it's the default value for this dialect 1139 if ( 1140 not params 1141 and group 1142 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1143 ): 1144 group = None 1145 return self.func( 1146 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1147 ) 1148 1149 @unsupported_args("culture") 1150 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1151 fmt = expression.args.get("format") 1152 if fmt and fmt.is_int: 1153 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1154 1155 self.unsupported("Only integer formats are supported by NumberToStr") 1156 return self.function_fallback_sql(expression) 1157 1158 def autoincrementcolumnconstraint_sql(self, _) -> str: 1159 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1160 return ""
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
294 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 295 if isinstance(path, exp.Literal): 296 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 297 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 298 # This check ensures we'll avoid trying to parse these as JSON paths, which can 299 # either result in a noisy warning or in an invalid representation of the path. 300 path_text = path.name 301 if path_text.startswith("/") or "[#" in path_text: 302 return path 303 304 return super().to_json_path(path)
306 class Tokenizer(tokens.Tokenizer): 307 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 308 HEREDOC_STRINGS = ["$"] 309 310 HEREDOC_TAG_IS_IDENTIFIER = True 311 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 312 313 KEYWORDS = { 314 **tokens.Tokenizer.KEYWORDS, 315 "//": TokenType.DIV, 316 "**": TokenType.DSTAR, 317 "^@": TokenType.CARET_AT, 318 "@>": TokenType.AT_GT, 319 "<@": TokenType.LT_AT, 320 "ATTACH": TokenType.ATTACH, 321 "BINARY": TokenType.VARBINARY, 322 "BITSTRING": TokenType.BIT, 323 "BPCHAR": TokenType.TEXT, 324 "CHAR": TokenType.TEXT, 325 "DATETIME": TokenType.TIMESTAMPNTZ, 326 "DETACH": TokenType.DETACH, 327 "EXCLUDE": TokenType.EXCEPT, 328 "LOGICAL": TokenType.BOOLEAN, 329 "ONLY": TokenType.ONLY, 330 "PIVOT_WIDER": TokenType.PIVOT, 331 "POSITIONAL": TokenType.POSITIONAL, 332 "SIGNED": TokenType.INT, 333 "STRING": TokenType.TEXT, 334 "SUMMARIZE": TokenType.SUMMARIZE, 335 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 336 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 337 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 338 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 339 "TIMESTAMP_US": TokenType.TIMESTAMP, 340 "UBIGINT": TokenType.UBIGINT, 341 "UINTEGER": TokenType.UINT, 342 "USMALLINT": TokenType.USMALLINT, 343 "UTINYINT": TokenType.UTINYINT, 344 "VARCHAR": TokenType.TEXT, 345 } 346 KEYWORDS.pop("/*+") 347 348 SINGLE_TOKENS = { 349 **tokens.Tokenizer.SINGLE_TOKENS, 350 "$": TokenType.PARAMETER, 351 } 352 353 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
355 class Parser(parser.Parser): 356 BITWISE = { 357 **parser.Parser.BITWISE, 358 TokenType.TILDA: exp.RegexpLike, 359 } 360 BITWISE.pop(TokenType.CARET) 361 362 RANGE_PARSERS = { 363 **parser.Parser.RANGE_PARSERS, 364 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 365 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 366 } 367 368 EXPONENT = { 369 **parser.Parser.EXPONENT, 370 TokenType.CARET: exp.Pow, 371 TokenType.DSTAR: exp.Pow, 372 } 373 374 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 375 376 SHOW_PARSERS = { 377 "TABLES": _show_parser("TABLES"), 378 "ALL TABLES": _show_parser("ALL TABLES"), 379 } 380 381 FUNCTIONS = { 382 **parser.Parser.FUNCTIONS, 383 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 384 "ARRAY_SORT": exp.SortArray.from_arg_list, 385 "DATEDIFF": _build_date_diff, 386 "DATE_DIFF": _build_date_diff, 387 "DATE_TRUNC": date_trunc_to_time, 388 "DATETRUNC": date_trunc_to_time, 389 "DECODE": lambda args: exp.Decode( 390 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 391 ), 392 "EDITDIST3": exp.Levenshtein.from_arg_list, 393 "ENCODE": lambda args: exp.Encode( 394 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 395 ), 396 "EPOCH": exp.TimeToUnix.from_arg_list, 397 "EPOCH_MS": lambda args: exp.UnixToTime( 398 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 399 ), 400 "GENERATE_SERIES": _build_generate_series(), 401 "JSON": exp.ParseJSON.from_arg_list, 402 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 403 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 404 "LIST_HAS": exp.ArrayContains.from_arg_list, 405 "LIST_REVERSE_SORT": _build_sort_array_desc, 406 "LIST_SORT": exp.SortArray.from_arg_list, 407 "LIST_VALUE": lambda args: exp.Array(expressions=args), 408 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 409 "MAKE_TIMESTAMP": _build_make_timestamp, 410 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 411 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 412 "RANGE": _build_generate_series(end_exclusive=True), 413 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 414 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 415 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 416 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 417 this=seq_get(args, 0), 418 expression=seq_get(args, 1), 419 replacement=seq_get(args, 2), 420 modifiers=seq_get(args, 3), 421 ), 422 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 423 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 424 "STRING_SPLIT": exp.Split.from_arg_list, 425 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 426 "STRING_TO_ARRAY": exp.Split.from_arg_list, 427 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 428 "STRUCT_PACK": exp.Struct.from_arg_list, 429 "STR_SPLIT": exp.Split.from_arg_list, 430 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 431 "TIME_BUCKET": exp.DateBin.from_arg_list, 432 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 433 "UNNEST": exp.Explode.from_arg_list, 434 "XOR": binary_from_function(exp.BitwiseXor), 435 } 436 437 FUNCTIONS.pop("DATE_SUB") 438 FUNCTIONS.pop("GLOB") 439 440 FUNCTION_PARSERS = { 441 **parser.Parser.FUNCTION_PARSERS, 442 **dict.fromkeys( 443 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 444 ), 445 } 446 FUNCTION_PARSERS.pop("DECODE") 447 448 NO_PAREN_FUNCTION_PARSERS = { 449 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 450 "MAP": lambda self: self._parse_map(), 451 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 452 } 453 454 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 455 TokenType.SEMI, 456 TokenType.ANTI, 457 } 458 459 PLACEHOLDER_PARSERS = { 460 **parser.Parser.PLACEHOLDER_PARSERS, 461 TokenType.PARAMETER: lambda self: ( 462 self.expression(exp.Placeholder, this=self._prev.text) 463 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 464 else None 465 ), 466 } 467 468 TYPE_CONVERTERS = { 469 # https://duckdb.org/docs/sql/data_types/numeric 470 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 471 # https://duckdb.org/docs/sql/data_types/text 472 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 473 } 474 475 STATEMENT_PARSERS = { 476 **parser.Parser.STATEMENT_PARSERS, 477 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 478 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 479 TokenType.SHOW: lambda self: self._parse_show(), 480 } 481 482 def _parse_expression(self) -> t.Optional[exp.Expression]: 483 # DuckDB supports prefix aliases, e.g. foo: 1 484 if self._next and self._next.token_type == TokenType.COLON: 485 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 486 self._match(TokenType.COLON) 487 comments = self._prev_comments or [] 488 489 this = self._parse_assignment() 490 if isinstance(this, exp.Expression): 491 # Moves the comment next to the alias in `alias: expr /* comment */` 492 comments += this.pop_comments() or [] 493 494 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 495 496 return super()._parse_expression() 497 498 def _parse_table( 499 self, 500 schema: bool = False, 501 joins: bool = False, 502 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 503 parse_bracket: bool = False, 504 is_db_reference: bool = False, 505 parse_partition: bool = False, 506 ) -> t.Optional[exp.Expression]: 507 # DuckDB supports prefix aliases, e.g. FROM foo: bar 508 if self._next and self._next.token_type == TokenType.COLON: 509 alias = self._parse_table_alias( 510 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 511 ) 512 self._match(TokenType.COLON) 513 comments = self._prev_comments or [] 514 else: 515 alias = None 516 comments = [] 517 518 table = super()._parse_table( 519 schema=schema, 520 joins=joins, 521 alias_tokens=alias_tokens, 522 parse_bracket=parse_bracket, 523 is_db_reference=is_db_reference, 524 parse_partition=parse_partition, 525 ) 526 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 527 # Moves the comment next to the alias in `alias: table /* comment */` 528 comments += table.pop_comments() or [] 529 alias.comments = alias.pop_comments() + comments 530 table.set("alias", alias) 531 532 return table 533 534 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 535 # https://duckdb.org/docs/sql/samples.html 536 sample = super()._parse_table_sample(as_modifier=as_modifier) 537 if sample and not sample.args.get("method"): 538 if sample.args.get("size"): 539 sample.set("method", exp.var("RESERVOIR")) 540 else: 541 sample.set("method", exp.var("SYSTEM")) 542 543 return sample 544 545 def _parse_bracket( 546 self, this: t.Optional[exp.Expression] = None 547 ) -> t.Optional[exp.Expression]: 548 bracket = super()._parse_bracket(this) 549 550 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 551 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 552 bracket.set("returns_list_for_maps", True) 553 554 return bracket 555 556 def _parse_map(self) -> exp.ToMap | exp.Map: 557 if self._match(TokenType.L_BRACE, advance=False): 558 return self.expression(exp.ToMap, this=self._parse_bracket()) 559 560 args = self._parse_wrapped_csv(self._parse_assignment) 561 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 562 563 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 564 return self._parse_field_def() 565 566 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 567 if len(aggregations) == 1: 568 return super()._pivot_column_names(aggregations) 569 return pivot_column_names(aggregations, dialect="duckdb") 570 571 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 572 def _parse_attach_option() -> exp.AttachOption: 573 return self.expression( 574 exp.AttachOption, 575 this=self._parse_var(any_token=True), 576 expression=self._parse_field(any_token=True), 577 ) 578 579 self._match(TokenType.DATABASE) 580 exists = self._parse_exists(not_=is_attach) 581 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 582 583 if self._match(TokenType.L_PAREN, advance=False): 584 expressions = self._parse_wrapped_csv(_parse_attach_option) 585 else: 586 expressions = None 587 588 return ( 589 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 590 if is_attach 591 else self.expression(exp.Detach, this=this, exists=exists) 592 ) 593 594 def _parse_show_duckdb(self, this: str) -> exp.Show: 595 return self.expression(exp.Show, this=this)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
597 class Generator(generator.Generator): 598 PARAMETER_TOKEN = "$" 599 NAMED_PLACEHOLDER_TOKEN = "$" 600 JOIN_HINTS = False 601 TABLE_HINTS = False 602 QUERY_HINTS = False 603 LIMIT_FETCH = "LIMIT" 604 STRUCT_DELIMITER = ("(", ")") 605 RENAME_TABLE_WITH_DB = False 606 NVL2_SUPPORTED = False 607 SEMI_ANTI_JOIN_WITH_SIDE = False 608 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 609 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 610 LAST_DAY_SUPPORTS_DATE_PART = False 611 JSON_KEY_VALUE_PAIR_SEP = "," 612 IGNORE_NULLS_IN_FUNC = True 613 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 614 SUPPORTS_CREATE_TABLE_LIKE = False 615 MULTI_ARG_DISTINCT = False 616 CAN_IMPLEMENT_ARRAY_ANY = True 617 SUPPORTS_TO_NUMBER = False 618 SUPPORTS_WINDOW_EXCLUDE = True 619 COPY_HAS_INTO_KEYWORD = False 620 STAR_EXCEPT = "EXCLUDE" 621 PAD_FILL_PATTERN_IS_REQUIRED = True 622 ARRAY_CONCAT_IS_VAR_LEN = False 623 ARRAY_SIZE_DIM_REQUIRED = False 624 625 TRANSFORMS = { 626 **generator.Generator.TRANSFORMS, 627 exp.ApproxDistinct: approx_count_distinct_sql, 628 exp.Array: inline_array_unless_query, 629 exp.ArrayFilter: rename_func("LIST_FILTER"), 630 exp.ArrayRemove: remove_from_array_using_filter, 631 exp.ArraySort: _array_sort_sql, 632 exp.ArraySum: rename_func("LIST_SUM"), 633 exp.BitwiseXor: rename_func("XOR"), 634 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 635 exp.CurrentDate: lambda *_: "CURRENT_DATE", 636 exp.CurrentTime: lambda *_: "CURRENT_TIME", 637 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 638 exp.DayOfMonth: rename_func("DAYOFMONTH"), 639 exp.DayOfWeek: rename_func("DAYOFWEEK"), 640 exp.DayOfWeekIso: rename_func("ISODOW"), 641 exp.DayOfYear: rename_func("DAYOFYEAR"), 642 exp.DataType: _datatype_sql, 643 exp.Date: _date_sql, 644 exp.DateAdd: _date_delta_sql, 645 exp.DateFromParts: rename_func("MAKE_DATE"), 646 exp.DateSub: _date_delta_sql, 647 exp.DateDiff: _date_diff_sql, 648 exp.DateStrToDate: datestrtodate_sql, 649 exp.Datetime: no_datetime_sql, 650 exp.DatetimeSub: _date_delta_sql, 651 exp.DatetimeAdd: _date_delta_sql, 652 exp.DateToDi: lambda self, 653 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 654 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 655 exp.DiToDate: lambda self, 656 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 657 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 658 exp.GenerateDateArray: _generate_datetime_array_sql, 659 exp.GenerateTimestampArray: _generate_datetime_array_sql, 660 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 661 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 662 exp.Explode: rename_func("UNNEST"), 663 exp.IntDiv: lambda self, e: self.binary(e, "//"), 664 exp.IsInf: rename_func("ISINF"), 665 exp.IsNan: rename_func("ISNAN"), 666 exp.JSONBExists: rename_func("JSON_EXISTS"), 667 exp.JSONExtract: _arrow_json_extract_sql, 668 exp.JSONExtractArray: _json_extract_value_array_sql, 669 exp.JSONExtractScalar: _arrow_json_extract_sql, 670 exp.JSONFormat: _json_format_sql, 671 exp.JSONValueArray: _json_extract_value_array_sql, 672 exp.Lateral: explode_to_unnest_sql, 673 exp.LogicalOr: rename_func("BOOL_OR"), 674 exp.LogicalAnd: rename_func("BOOL_AND"), 675 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 676 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 677 exp.MonthsBetween: lambda self, e: self.func( 678 "DATEDIFF", 679 "'month'", 680 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 681 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 682 ), 683 exp.PercentileCont: rename_func("QUANTILE_CONT"), 684 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 685 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 686 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 687 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 688 exp.RegexpReplace: lambda self, e: self.func( 689 "REGEXP_REPLACE", 690 e.this, 691 e.expression, 692 e.args.get("replacement"), 693 e.args.get("modifiers"), 694 ), 695 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 696 exp.RegexpILike: lambda self, e: self.func( 697 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 698 ), 699 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 700 exp.Return: lambda self, e: self.sql(e, "this"), 701 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 702 exp.Rand: rename_func("RANDOM"), 703 exp.SHA: rename_func("SHA1"), 704 exp.SHA2: sha256_sql, 705 exp.Split: rename_func("STR_SPLIT"), 706 exp.SortArray: _sort_array_sql, 707 exp.StrPosition: strposition_sql, 708 exp.StrToUnix: lambda self, e: self.func( 709 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 710 ), 711 exp.Struct: _struct_sql, 712 exp.Transform: rename_func("LIST_TRANSFORM"), 713 exp.TimeAdd: _date_delta_sql, 714 exp.Time: no_time_sql, 715 exp.TimeDiff: _timediff_sql, 716 exp.Timestamp: no_timestamp_sql, 717 exp.TimestampDiff: lambda self, e: self.func( 718 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 719 ), 720 exp.TimestampTrunc: timestamptrunc_sql(), 721 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 722 exp.TimeStrToTime: timestrtotime_sql, 723 exp.TimeStrToUnix: lambda self, e: self.func( 724 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 725 ), 726 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 727 exp.TimeToUnix: rename_func("EPOCH"), 728 exp.TsOrDiToDi: lambda self, 729 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 730 exp.TsOrDsAdd: _date_delta_sql, 731 exp.TsOrDsDiff: lambda self, e: self.func( 732 "DATE_DIFF", 733 f"'{e.args.get('unit') or 'DAY'}'", 734 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 735 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 736 ), 737 exp.UnixToStr: lambda self, e: self.func( 738 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 739 ), 740 exp.DatetimeTrunc: lambda self, e: self.func( 741 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 742 ), 743 exp.UnixToTime: _unix_to_time_sql, 744 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 745 exp.VariancePop: rename_func("VAR_POP"), 746 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 747 exp.Xor: bool_xor_sql, 748 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 749 rename_func("LEVENSHTEIN") 750 ), 751 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 752 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 753 exp.DateBin: rename_func("TIME_BUCKET"), 754 } 755 756 SUPPORTED_JSON_PATH_PARTS = { 757 exp.JSONPathKey, 758 exp.JSONPathRoot, 759 exp.JSONPathSubscript, 760 exp.JSONPathWildcard, 761 } 762 763 TYPE_MAPPING = { 764 **generator.Generator.TYPE_MAPPING, 765 exp.DataType.Type.BINARY: "BLOB", 766 exp.DataType.Type.BPCHAR: "TEXT", 767 exp.DataType.Type.CHAR: "TEXT", 768 exp.DataType.Type.DATETIME: "TIMESTAMP", 769 exp.DataType.Type.FLOAT: "REAL", 770 exp.DataType.Type.JSONB: "JSON", 771 exp.DataType.Type.NCHAR: "TEXT", 772 exp.DataType.Type.NVARCHAR: "TEXT", 773 exp.DataType.Type.UINT: "UINTEGER", 774 exp.DataType.Type.VARBINARY: "BLOB", 775 exp.DataType.Type.ROWVERSION: "BLOB", 776 exp.DataType.Type.VARCHAR: "TEXT", 777 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 778 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 779 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 780 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 781 } 782 783 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 784 RESERVED_KEYWORDS = { 785 "array", 786 "analyse", 787 "union", 788 "all", 789 "when", 790 "in_p", 791 "default", 792 "create_p", 793 "window", 794 "asymmetric", 795 "to", 796 "else", 797 "localtime", 798 "from", 799 "end_p", 800 "select", 801 "current_date", 802 "foreign", 803 "with", 804 "grant", 805 "session_user", 806 "or", 807 "except", 808 "references", 809 "fetch", 810 "limit", 811 "group_p", 812 "leading", 813 "into", 814 "collate", 815 "offset", 816 "do", 817 "then", 818 "localtimestamp", 819 "check_p", 820 "lateral_p", 821 "current_role", 822 "where", 823 "asc_p", 824 "placing", 825 "desc_p", 826 "user", 827 "unique", 828 "initially", 829 "column", 830 "both", 831 "some", 832 "as", 833 "any", 834 "only", 835 "deferrable", 836 "null_p", 837 "current_time", 838 "true_p", 839 "table", 840 "case", 841 "trailing", 842 "variadic", 843 "for", 844 "on", 845 "distinct", 846 "false_p", 847 "not", 848 "constraint", 849 "current_timestamp", 850 "returning", 851 "primary", 852 "intersect", 853 "having", 854 "analyze", 855 "current_user", 856 "and", 857 "cast", 858 "symmetric", 859 "using", 860 "order", 861 "current_catalog", 862 } 863 864 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 865 866 # DuckDB doesn't generally support CREATE TABLE .. properties 867 # https://duckdb.org/docs/sql/statements/create_table.html 868 PROPERTIES_LOCATION = { 869 prop: exp.Properties.Location.UNSUPPORTED 870 for prop in generator.Generator.PROPERTIES_LOCATION 871 } 872 873 # There are a few exceptions (e.g. temporary tables) which are supported or 874 # can be transpiled to DuckDB, so we explicitly override them accordingly 875 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 876 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 877 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 878 879 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 880 exp.FirstValue, 881 exp.Lag, 882 exp.LastValue, 883 exp.Lead, 884 exp.NthValue, 885 ) 886 887 def show_sql(self, expression: exp.Show) -> str: 888 return f"SHOW {expression.name}" 889 890 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 891 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 892 893 def strtotime_sql(self, expression: exp.StrToTime) -> str: 894 if expression.args.get("safe"): 895 formatted_time = self.format_time(expression) 896 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 897 return str_to_time_sql(self, expression) 898 899 def strtodate_sql(self, expression: exp.StrToDate) -> str: 900 if expression.args.get("safe"): 901 formatted_time = self.format_time(expression) 902 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 903 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 904 905 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 906 arg = expression.this 907 if expression.args.get("safe"): 908 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 909 return self.func("JSON", arg) 910 911 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 912 nano = expression.args.get("nano") 913 if nano is not None: 914 expression.set( 915 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 916 ) 917 918 return rename_func("MAKE_TIME")(self, expression) 919 920 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 921 sec = expression.args["sec"] 922 923 milli = expression.args.get("milli") 924 if milli is not None: 925 sec += milli.pop() / exp.Literal.number(1000.0) 926 927 nano = expression.args.get("nano") 928 if nano is not None: 929 sec += nano.pop() / exp.Literal.number(1000000000.0) 930 931 if milli or nano: 932 expression.set("sec", sec) 933 934 return rename_func("MAKE_TIMESTAMP")(self, expression) 935 936 def tablesample_sql( 937 self, 938 expression: exp.TableSample, 939 tablesample_keyword: t.Optional[str] = None, 940 ) -> str: 941 if not isinstance(expression.parent, exp.Select): 942 # This sample clause only applies to a single source, not the entire resulting relation 943 tablesample_keyword = "TABLESAMPLE" 944 945 if expression.args.get("size"): 946 method = expression.args.get("method") 947 if method and method.name.upper() != "RESERVOIR": 948 self.unsupported( 949 f"Sampling method {method} is not supported with a discrete sample count, " 950 "defaulting to reservoir sampling" 951 ) 952 expression.set("method", exp.var("RESERVOIR")) 953 954 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 955 956 def interval_sql(self, expression: exp.Interval) -> str: 957 multiplier: t.Optional[int] = None 958 unit = expression.text("unit").lower() 959 960 if unit.startswith("week"): 961 multiplier = 7 962 if unit.startswith("quarter"): 963 multiplier = 90 964 965 if multiplier: 966 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 967 968 return super().interval_sql(expression) 969 970 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 971 if isinstance(expression.parent, exp.UserDefinedFunction): 972 return self.sql(expression, "this") 973 return super().columndef_sql(expression, sep) 974 975 def join_sql(self, expression: exp.Join) -> str: 976 if ( 977 expression.side == "LEFT" 978 and not expression.args.get("on") 979 and isinstance(expression.this, exp.Unnest) 980 ): 981 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 982 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 983 return super().join_sql(expression.on(exp.true())) 984 985 return super().join_sql(expression) 986 987 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 988 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 989 if expression.args.get("is_end_exclusive"): 990 return rename_func("RANGE")(self, expression) 991 992 return self.function_fallback_sql(expression) 993 994 def countif_sql(self, expression: exp.CountIf) -> str: 995 if self.dialect.version >= Version("1.2"): 996 return self.function_fallback_sql(expression) 997 998 # https://github.com/tobymao/sqlglot/pull/4749 999 return count_if_to_sum(self, expression) 1000 1001 def bracket_sql(self, expression: exp.Bracket) -> str: 1002 if self.dialect.version >= Version("1.2"): 1003 return super().bracket_sql(expression) 1004 1005 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1006 this = expression.this 1007 if isinstance(this, exp.Array): 1008 this.replace(exp.paren(this)) 1009 1010 bracket = super().bracket_sql(expression) 1011 1012 if not expression.args.get("returns_list_for_maps"): 1013 if not this.type: 1014 from sqlglot.optimizer.annotate_types import annotate_types 1015 1016 this = annotate_types(this, dialect=self.dialect) 1017 1018 if this.is_type(exp.DataType.Type.MAP): 1019 bracket = f"({bracket})[1]" 1020 1021 return bracket 1022 1023 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1024 expression_sql = self.sql(expression, "expression") 1025 1026 func = expression.this 1027 if isinstance(func, exp.PERCENTILES): 1028 # Make the order key the first arg and slide the fraction to the right 1029 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1030 order_col = expression.find(exp.Ordered) 1031 if order_col: 1032 func.set("expression", func.this) 1033 func.set("this", order_col.this) 1034 1035 this = self.sql(expression, "this").rstrip(")") 1036 1037 return f"{this}{expression_sql})" 1038 1039 def length_sql(self, expression: exp.Length) -> str: 1040 arg = expression.this 1041 1042 # Dialects like BQ and Snowflake also accept binary values as args, so 1043 # DDB will attempt to infer the type or resort to case/when resolution 1044 if not expression.args.get("binary") or arg.is_string: 1045 return self.func("LENGTH", arg) 1046 1047 if not arg.type: 1048 from sqlglot.optimizer.annotate_types import annotate_types 1049 1050 arg = annotate_types(arg, dialect=self.dialect) 1051 1052 if arg.is_type(*exp.DataType.TEXT_TYPES): 1053 return self.func("LENGTH", arg) 1054 1055 # We need these casts to make duckdb's static type checker happy 1056 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1057 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1058 1059 case = ( 1060 exp.case(self.func("TYPEOF", arg)) 1061 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1062 .else_( 1063 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1064 ) # anonymous to break length_sql recursion 1065 ) 1066 1067 return self.sql(case) 1068 1069 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1070 this = expression.this 1071 key = expression.args.get("key") 1072 key_sql = key.name if isinstance(key, exp.Expression) else "" 1073 value_sql = self.sql(expression, "value") 1074 1075 kv_sql = f"{key_sql} := {value_sql}" 1076 1077 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1078 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1079 if isinstance(this, exp.Struct) and not this.expressions: 1080 return self.func("STRUCT_PACK", kv_sql) 1081 1082 return self.func("STRUCT_INSERT", this, kv_sql) 1083 1084 def unnest_sql(self, expression: exp.Unnest) -> str: 1085 explode_array = expression.args.get("explode_array") 1086 if explode_array: 1087 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1088 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1089 expression.expressions.append( 1090 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1091 ) 1092 1093 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1094 alias = expression.args.get("alias") 1095 if alias: 1096 expression.set("alias", None) 1097 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1098 1099 unnest_sql = super().unnest_sql(expression) 1100 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1101 return self.sql(select) 1102 1103 return super().unnest_sql(expression) 1104 1105 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1106 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1107 # DuckDB should render IGNORE NULLS only for the general-purpose 1108 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1109 return super().ignorenulls_sql(expression) 1110 1111 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1112 return self.sql(expression, "this") 1113 1114 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1115 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1116 # DuckDB should render RESPECT NULLS only for the general-purpose 1117 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1118 return super().respectnulls_sql(expression) 1119 1120 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1121 return self.sql(expression, "this") 1122 1123 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1124 this = self.sql(expression, "this") 1125 null_text = self.sql(expression, "null") 1126 1127 if null_text: 1128 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1129 1130 return self.func("ARRAY_TO_STRING", this, expression.expression) 1131 1132 @unsupported_args("position", "occurrence") 1133 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1134 group = expression.args.get("group") 1135 params = expression.args.get("parameters") 1136 1137 # Do not render group if there is no following argument, 1138 # and it's the default value for this dialect 1139 if ( 1140 not params 1141 and group 1142 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1143 ): 1144 group = None 1145 return self.func( 1146 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1147 ) 1148 1149 @unsupported_args("culture") 1150 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1151 fmt = expression.args.get("format") 1152 if fmt and fmt.is_int: 1153 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1154 1155 self.unsupported("Only integer formats are supported by NumberToStr") 1156 return self.function_fallback_sql(expression) 1157 1158 def autoincrementcolumnconstraint_sql(self, _) -> str: 1159 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1160 return ""
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
899 def strtodate_sql(self, expression: exp.StrToDate) -> str: 900 if expression.args.get("safe"): 901 formatted_time = self.format_time(expression) 902 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 903 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
911 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 912 nano = expression.args.get("nano") 913 if nano is not None: 914 expression.set( 915 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 916 ) 917 918 return rename_func("MAKE_TIME")(self, expression)
920 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 921 sec = expression.args["sec"] 922 923 milli = expression.args.get("milli") 924 if milli is not None: 925 sec += milli.pop() / exp.Literal.number(1000.0) 926 927 nano = expression.args.get("nano") 928 if nano is not None: 929 sec += nano.pop() / exp.Literal.number(1000000000.0) 930 931 if milli or nano: 932 expression.set("sec", sec) 933 934 return rename_func("MAKE_TIMESTAMP")(self, expression)
936 def tablesample_sql( 937 self, 938 expression: exp.TableSample, 939 tablesample_keyword: t.Optional[str] = None, 940 ) -> str: 941 if not isinstance(expression.parent, exp.Select): 942 # This sample clause only applies to a single source, not the entire resulting relation 943 tablesample_keyword = "TABLESAMPLE" 944 945 if expression.args.get("size"): 946 method = expression.args.get("method") 947 if method and method.name.upper() != "RESERVOIR": 948 self.unsupported( 949 f"Sampling method {method} is not supported with a discrete sample count, " 950 "defaulting to reservoir sampling" 951 ) 952 expression.set("method", exp.var("RESERVOIR")) 953 954 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
956 def interval_sql(self, expression: exp.Interval) -> str: 957 multiplier: t.Optional[int] = None 958 unit = expression.text("unit").lower() 959 960 if unit.startswith("week"): 961 multiplier = 7 962 if unit.startswith("quarter"): 963 multiplier = 90 964 965 if multiplier: 966 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 967 968 return super().interval_sql(expression)
975 def join_sql(self, expression: exp.Join) -> str: 976 if ( 977 expression.side == "LEFT" 978 and not expression.args.get("on") 979 and isinstance(expression.this, exp.Unnest) 980 ): 981 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 982 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 983 return super().join_sql(expression.on(exp.true())) 984 985 return super().join_sql(expression)
1001 def bracket_sql(self, expression: exp.Bracket) -> str: 1002 if self.dialect.version >= Version("1.2"): 1003 return super().bracket_sql(expression) 1004 1005 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1006 this = expression.this 1007 if isinstance(this, exp.Array): 1008 this.replace(exp.paren(this)) 1009 1010 bracket = super().bracket_sql(expression) 1011 1012 if not expression.args.get("returns_list_for_maps"): 1013 if not this.type: 1014 from sqlglot.optimizer.annotate_types import annotate_types 1015 1016 this = annotate_types(this, dialect=self.dialect) 1017 1018 if this.is_type(exp.DataType.Type.MAP): 1019 bracket = f"({bracket})[1]" 1020 1021 return bracket
1023 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1024 expression_sql = self.sql(expression, "expression") 1025 1026 func = expression.this 1027 if isinstance(func, exp.PERCENTILES): 1028 # Make the order key the first arg and slide the fraction to the right 1029 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1030 order_col = expression.find(exp.Ordered) 1031 if order_col: 1032 func.set("expression", func.this) 1033 func.set("this", order_col.this) 1034 1035 this = self.sql(expression, "this").rstrip(")") 1036 1037 return f"{this}{expression_sql})"
1039 def length_sql(self, expression: exp.Length) -> str: 1040 arg = expression.this 1041 1042 # Dialects like BQ and Snowflake also accept binary values as args, so 1043 # DDB will attempt to infer the type or resort to case/when resolution 1044 if not expression.args.get("binary") or arg.is_string: 1045 return self.func("LENGTH", arg) 1046 1047 if not arg.type: 1048 from sqlglot.optimizer.annotate_types import annotate_types 1049 1050 arg = annotate_types(arg, dialect=self.dialect) 1051 1052 if arg.is_type(*exp.DataType.TEXT_TYPES): 1053 return self.func("LENGTH", arg) 1054 1055 # We need these casts to make duckdb's static type checker happy 1056 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1057 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1058 1059 case = ( 1060 exp.case(self.func("TYPEOF", arg)) 1061 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1062 .else_( 1063 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1064 ) # anonymous to break length_sql recursion 1065 ) 1066 1067 return self.sql(case)
1069 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1070 this = expression.this 1071 key = expression.args.get("key") 1072 key_sql = key.name if isinstance(key, exp.Expression) else "" 1073 value_sql = self.sql(expression, "value") 1074 1075 kv_sql = f"{key_sql} := {value_sql}" 1076 1077 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1078 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1079 if isinstance(this, exp.Struct) and not this.expressions: 1080 return self.func("STRUCT_PACK", kv_sql) 1081 1082 return self.func("STRUCT_INSERT", this, kv_sql)
1084 def unnest_sql(self, expression: exp.Unnest) -> str: 1085 explode_array = expression.args.get("explode_array") 1086 if explode_array: 1087 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1088 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1089 expression.expressions.append( 1090 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1091 ) 1092 1093 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1094 alias = expression.args.get("alias") 1095 if alias: 1096 expression.set("alias", None) 1097 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1098 1099 unnest_sql = super().unnest_sql(expression) 1100 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1101 return self.sql(select) 1102 1103 return super().unnest_sql(expression)
1105 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1106 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1107 # DuckDB should render IGNORE NULLS only for the general-purpose 1108 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1109 return super().ignorenulls_sql(expression) 1110 1111 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1112 return self.sql(expression, "this")
1114 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1115 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1116 # DuckDB should render RESPECT NULLS only for the general-purpose 1117 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1118 return super().respectnulls_sql(expression) 1119 1120 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1121 return self.sql(expression, "this")
1123 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1124 this = self.sql(expression, "this") 1125 null_text = self.sql(expression, "null") 1126 1127 if null_text: 1128 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1129 1130 return self.func("ARRAY_TO_STRING", this, expression.expression)
1132 @unsupported_args("position", "occurrence") 1133 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1134 group = expression.args.get("group") 1135 params = expression.args.get("parameters") 1136 1137 # Do not render group if there is no following argument, 1138 # and it's the default value for this dialect 1139 if ( 1140 not params 1141 and group 1142 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1143 ): 1144 group = None 1145 return self.func( 1146 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1147 )
1149 @unsupported_args("culture") 1150 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1151 fmt = expression.args.get("format") 1152 if fmt and fmt.is_int: 1153 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1154 1155 self.unsupported("Only integer formats are supported by NumberToStr") 1156 return self.function_fallback_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql