sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_trunc_to_time, 19 datestrtodate_sql, 20 no_datetime_sql, 21 encode_decode_sql, 22 build_formatted_time, 23 inline_array_unless_query, 24 no_comment_column_constraint_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 strposition_sql, 30 str_to_time_sql, 31 timestamptrunc_sql, 32 timestrtotime_sql, 33 unit_to_var, 34 unit_to_str, 35 sha256_sql, 36 build_regexp_extract, 37 explode_to_unnest_sql, 38 no_make_interval_sql, 39 groupconcat_sql, 40) 41from sqlglot.generator import unsupported_args 42from sqlglot.helper import seq_get 43from sqlglot.tokens import TokenType 44from sqlglot.parser import binary_range_parser 45 46DATETIME_DELTA = t.Union[ 47 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 48] 49 50WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 51 exp.FirstValue, 52 exp.LastValue, 53 exp.Lag, 54 exp.Lead, 55 exp.NthValue, 56) 57 58 59def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 60 this = expression.this 61 unit = unit_to_var(expression) 62 op = ( 63 "+" 64 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 65 else "-" 66 ) 67 68 to_type: t.Optional[DATA_TYPE] = None 69 if isinstance(expression, exp.TsOrDsAdd): 70 to_type = expression.return_type 71 elif this.is_string: 72 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 73 to_type = ( 74 exp.DataType.Type.DATETIME 75 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 76 else exp.DataType.Type.DATE 77 ) 78 79 this = exp.cast(this, to_type) if to_type else this 80 81 expr = expression.expression 82 interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit) 83 84 return f"{self.sql(this)} {op} {self.sql(interval)}" 85 86 87# BigQuery -> DuckDB conversion for the DATE function 88def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 89 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 90 zone = self.sql(expression, "zone") 91 92 if zone: 93 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 94 date_str = f"{date_str} || ' ' || {zone}" 95 96 # This will create a TIMESTAMP with time zone information 97 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 98 99 return result 100 101 102# BigQuery -> DuckDB conversion for the TIME_DIFF function 103def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 104 this = exp.cast(expression.this, exp.DataType.Type.TIME) 105 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 106 107 # Although the 2 dialects share similar signatures, BQ seems to inverse 108 # the sign of the result so the start/end time operands are flipped 109 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 110 111 112@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 113def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 114 return self.func("ARRAY_SORT", expression.this) 115 116 117def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 118 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 119 return self.func(name, expression.this) 120 121 122def _build_sort_array_desc(args: t.List) -> exp.Expression: 123 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 124 125 126def _build_date_diff(args: t.List) -> exp.Expression: 127 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 128 129 130def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 131 def _builder(args: t.List) -> exp.GenerateSeries: 132 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 133 if len(args) == 1: 134 # DuckDB uses 0 as a default for the series' start when it's omitted 135 args.insert(0, exp.Literal.number("0")) 136 137 gen_series = exp.GenerateSeries.from_arg_list(args) 138 gen_series.set("is_end_exclusive", end_exclusive) 139 140 return gen_series 141 142 return _builder 143 144 145def _build_make_timestamp(args: t.List) -> exp.Expression: 146 if len(args) == 1: 147 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 148 149 return exp.TimestampFromParts( 150 year=seq_get(args, 0), 151 month=seq_get(args, 1), 152 day=seq_get(args, 2), 153 hour=seq_get(args, 3), 154 min=seq_get(args, 4), 155 sec=seq_get(args, 5), 156 ) 157 158 159def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 160 args: t.List[str] = [] 161 162 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 163 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 164 # The transformation to ROW will take place if: 165 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 166 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 167 ancestor_cast = expression.find_ancestor(exp.Cast) 168 is_bq_inline_struct = ( 169 (expression.find(exp.PropertyEQ) is None) 170 and ancestor_cast 171 and any( 172 casted_type.is_type(exp.DataType.Type.STRUCT) 173 for casted_type in ancestor_cast.find_all(exp.DataType) 174 ) 175 ) 176 177 for i, expr in enumerate(expression.expressions): 178 is_property_eq = isinstance(expr, exp.PropertyEQ) 179 value = expr.expression if is_property_eq else expr 180 181 if is_bq_inline_struct: 182 args.append(self.sql(value)) 183 else: 184 key = expr.name if is_property_eq else f"_{i}" 185 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 186 187 csv_args = ", ".join(args) 188 189 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 190 191 192def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 193 if expression.is_type("array"): 194 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 195 196 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 197 if expression.is_type( 198 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 199 ): 200 return expression.this.value 201 202 return self.datatype_sql(expression) 203 204 205def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 206 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 207 return f"CAST({sql} AS TEXT)" 208 209 210def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 211 scale = expression.args.get("scale") 212 timestamp = expression.this 213 214 if scale in (None, exp.UnixToTime.SECONDS): 215 return self.func("TO_TIMESTAMP", timestamp) 216 if scale == exp.UnixToTime.MILLIS: 217 return self.func("EPOCH_MS", timestamp) 218 if scale == exp.UnixToTime.MICROS: 219 return self.func("MAKE_TIMESTAMP", timestamp) 220 221 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 222 223 224WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 225 226 227def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 228 arrow_sql = arrow_json_extract_sql(self, expression) 229 if not expression.same_parent and isinstance( 230 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 231 ): 232 arrow_sql = self.wrap(arrow_sql) 233 return arrow_sql 234 235 236def _implicit_datetime_cast( 237 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 238) -> t.Optional[exp.Expression]: 239 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 240 241 242def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 243 this = _implicit_datetime_cast(expression.this) 244 expr = _implicit_datetime_cast(expression.expression) 245 246 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 247 248 249def _generate_datetime_array_sql( 250 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 251) -> str: 252 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 253 254 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 255 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 256 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 257 258 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 259 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 260 start=start, end=end, step=expression.args.get("step") 261 ) 262 263 if is_generate_date_array: 264 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 265 # GENERATE_DATE_ARRAY we must cast it back to DATE array 266 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 267 268 return self.sql(gen_series) 269 270 271def _json_extract_value_array_sql( 272 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 273) -> str: 274 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 275 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 276 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 277 278 279class DuckDB(Dialect): 280 NULL_ORDERING = "nulls_are_last" 281 SUPPORTS_USER_DEFINED_TYPES = True 282 SAFE_DIVISION = True 283 INDEX_OFFSET = 1 284 CONCAT_COALESCE = True 285 SUPPORTS_ORDER_BY_ALL = True 286 SUPPORTS_FIXED_SIZE_ARRAYS = True 287 STRICT_JSON_PATH_SYNTAX = False 288 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 289 290 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 291 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 292 293 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 294 if isinstance(path, exp.Literal): 295 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 296 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 297 # This check ensures we'll avoid trying to parse these as JSON paths, which can 298 # either result in a noisy warning or in an invalid representation of the path. 299 path_text = path.name 300 if path_text.startswith("/") or "[#" in path_text: 301 return path 302 303 return super().to_json_path(path) 304 305 class Tokenizer(tokens.Tokenizer): 306 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 307 HEREDOC_STRINGS = ["$"] 308 309 HEREDOC_TAG_IS_IDENTIFIER = True 310 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "//": TokenType.DIV, 315 "**": TokenType.DSTAR, 316 "^@": TokenType.CARET_AT, 317 "@>": TokenType.AT_GT, 318 "<@": TokenType.LT_AT, 319 "ATTACH": TokenType.ATTACH, 320 "BINARY": TokenType.VARBINARY, 321 "BITSTRING": TokenType.BIT, 322 "BPCHAR": TokenType.TEXT, 323 "CHAR": TokenType.TEXT, 324 "CHARACTER VARYING": TokenType.TEXT, 325 "DATETIME": TokenType.TIMESTAMPNTZ, 326 "DETACH": TokenType.DETACH, 327 "EXCLUDE": TokenType.EXCEPT, 328 "LOGICAL": TokenType.BOOLEAN, 329 "ONLY": TokenType.ONLY, 330 "PIVOT_WIDER": TokenType.PIVOT, 331 "POSITIONAL": TokenType.POSITIONAL, 332 "SIGNED": TokenType.INT, 333 "STRING": TokenType.TEXT, 334 "SUMMARIZE": TokenType.SUMMARIZE, 335 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 336 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 337 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 338 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 339 "TIMESTAMP_US": TokenType.TIMESTAMP, 340 "UBIGINT": TokenType.UBIGINT, 341 "UINTEGER": TokenType.UINT, 342 "USMALLINT": TokenType.USMALLINT, 343 "UTINYINT": TokenType.UTINYINT, 344 "VARCHAR": TokenType.TEXT, 345 } 346 KEYWORDS.pop("/*+") 347 348 SINGLE_TOKENS = { 349 **tokens.Tokenizer.SINGLE_TOKENS, 350 "$": TokenType.PARAMETER, 351 } 352 353 class Parser(parser.Parser): 354 BITWISE = { 355 **parser.Parser.BITWISE, 356 TokenType.TILDA: exp.RegexpLike, 357 } 358 BITWISE.pop(TokenType.CARET) 359 360 RANGE_PARSERS = { 361 **parser.Parser.RANGE_PARSERS, 362 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 363 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 364 } 365 366 EXPONENT = { 367 **parser.Parser.EXPONENT, 368 TokenType.CARET: exp.Pow, 369 TokenType.DSTAR: exp.Pow, 370 } 371 372 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 373 374 FUNCTIONS = { 375 **parser.Parser.FUNCTIONS, 376 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 377 "ARRAY_SORT": exp.SortArray.from_arg_list, 378 "DATEDIFF": _build_date_diff, 379 "DATE_DIFF": _build_date_diff, 380 "DATE_TRUNC": date_trunc_to_time, 381 "DATETRUNC": date_trunc_to_time, 382 "DECODE": lambda args: exp.Decode( 383 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 384 ), 385 "EDITDIST3": exp.Levenshtein.from_arg_list, 386 "ENCODE": lambda args: exp.Encode( 387 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 388 ), 389 "EPOCH": exp.TimeToUnix.from_arg_list, 390 "EPOCH_MS": lambda args: exp.UnixToTime( 391 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 392 ), 393 "GENERATE_SERIES": _build_generate_series(), 394 "JSON": exp.ParseJSON.from_arg_list, 395 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 396 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 397 "LIST_HAS": exp.ArrayContains.from_arg_list, 398 "LIST_REVERSE_SORT": _build_sort_array_desc, 399 "LIST_SORT": exp.SortArray.from_arg_list, 400 "LIST_VALUE": lambda args: exp.Array(expressions=args), 401 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 402 "MAKE_TIMESTAMP": _build_make_timestamp, 403 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 404 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 405 "RANGE": _build_generate_series(end_exclusive=True), 406 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 407 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 408 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 409 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 410 this=seq_get(args, 0), 411 expression=seq_get(args, 1), 412 replacement=seq_get(args, 2), 413 modifiers=seq_get(args, 3), 414 ), 415 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 416 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 417 "STRING_SPLIT": exp.Split.from_arg_list, 418 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 419 "STRING_TO_ARRAY": exp.Split.from_arg_list, 420 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 421 "STRUCT_PACK": exp.Struct.from_arg_list, 422 "STR_SPLIT": exp.Split.from_arg_list, 423 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 424 "TIME_BUCKET": exp.DateBin.from_arg_list, 425 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 426 "UNNEST": exp.Explode.from_arg_list, 427 "XOR": binary_from_function(exp.BitwiseXor), 428 } 429 430 FUNCTIONS.pop("DATE_SUB") 431 FUNCTIONS.pop("GLOB") 432 433 FUNCTION_PARSERS = { 434 **parser.Parser.FUNCTION_PARSERS, 435 **dict.fromkeys( 436 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 437 ), 438 } 439 FUNCTION_PARSERS.pop("DECODE") 440 441 NO_PAREN_FUNCTION_PARSERS = { 442 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 443 "MAP": lambda self: self._parse_map(), 444 } 445 446 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 447 TokenType.SEMI, 448 TokenType.ANTI, 449 } 450 451 PLACEHOLDER_PARSERS = { 452 **parser.Parser.PLACEHOLDER_PARSERS, 453 TokenType.PARAMETER: lambda self: ( 454 self.expression(exp.Placeholder, this=self._prev.text) 455 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 456 else None 457 ), 458 } 459 460 TYPE_CONVERTERS = { 461 # https://duckdb.org/docs/sql/data_types/numeric 462 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 463 # https://duckdb.org/docs/sql/data_types/text 464 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 465 } 466 467 STATEMENT_PARSERS = { 468 **parser.Parser.STATEMENT_PARSERS, 469 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 470 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 471 } 472 473 def _parse_expression(self) -> t.Optional[exp.Expression]: 474 # DuckDB supports prefix aliases, e.g. foo: 1 475 if self._next and self._next.token_type == TokenType.COLON: 476 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 477 self._match(TokenType.COLON) 478 comments = self._prev_comments or [] 479 480 this = self._parse_assignment() 481 if isinstance(this, exp.Expression): 482 # Moves the comment next to the alias in `alias: expr /* comment */` 483 comments += this.pop_comments() or [] 484 485 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 486 487 return super()._parse_expression() 488 489 def _parse_table( 490 self, 491 schema: bool = False, 492 joins: bool = False, 493 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 494 parse_bracket: bool = False, 495 is_db_reference: bool = False, 496 parse_partition: bool = False, 497 ) -> t.Optional[exp.Expression]: 498 # DuckDB supports prefix aliases, e.g. FROM foo: bar 499 if self._next and self._next.token_type == TokenType.COLON: 500 alias = self._parse_table_alias( 501 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 502 ) 503 self._match(TokenType.COLON) 504 comments = self._prev_comments or [] 505 else: 506 alias = None 507 comments = [] 508 509 table = super()._parse_table( 510 schema=schema, 511 joins=joins, 512 alias_tokens=alias_tokens, 513 parse_bracket=parse_bracket, 514 is_db_reference=is_db_reference, 515 parse_partition=parse_partition, 516 ) 517 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 518 # Moves the comment next to the alias in `alias: table /* comment */` 519 comments += table.pop_comments() or [] 520 alias.comments = alias.pop_comments() + comments 521 table.set("alias", alias) 522 523 return table 524 525 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 526 # https://duckdb.org/docs/sql/samples.html 527 sample = super()._parse_table_sample(as_modifier=as_modifier) 528 if sample and not sample.args.get("method"): 529 if sample.args.get("size"): 530 sample.set("method", exp.var("RESERVOIR")) 531 else: 532 sample.set("method", exp.var("SYSTEM")) 533 534 return sample 535 536 def _parse_bracket( 537 self, this: t.Optional[exp.Expression] = None 538 ) -> t.Optional[exp.Expression]: 539 bracket = super()._parse_bracket(this) 540 541 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 542 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 543 bracket.set("returns_list_for_maps", True) 544 545 return bracket 546 547 def _parse_map(self) -> exp.ToMap | exp.Map: 548 if self._match(TokenType.L_BRACE, advance=False): 549 return self.expression(exp.ToMap, this=self._parse_bracket()) 550 551 args = self._parse_wrapped_csv(self._parse_assignment) 552 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 553 554 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 555 return self._parse_field_def() 556 557 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 558 if len(aggregations) == 1: 559 return super()._pivot_column_names(aggregations) 560 return pivot_column_names(aggregations, dialect="duckdb") 561 562 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 563 def _parse_attach_option() -> exp.AttachOption: 564 return self.expression( 565 exp.AttachOption, 566 this=self._parse_var(any_token=True), 567 expression=self._parse_field(any_token=True), 568 ) 569 570 self._match(TokenType.DATABASE) 571 exists = self._parse_exists(not_=is_attach) 572 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 573 574 if self._match(TokenType.L_PAREN, advance=False): 575 expressions = self._parse_wrapped_csv(_parse_attach_option) 576 else: 577 expressions = None 578 579 return ( 580 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 581 if is_attach 582 else self.expression(exp.Detach, this=this, exists=exists) 583 ) 584 585 class Generator(generator.Generator): 586 PARAMETER_TOKEN = "$" 587 NAMED_PLACEHOLDER_TOKEN = "$" 588 JOIN_HINTS = False 589 TABLE_HINTS = False 590 QUERY_HINTS = False 591 LIMIT_FETCH = "LIMIT" 592 STRUCT_DELIMITER = ("(", ")") 593 RENAME_TABLE_WITH_DB = False 594 NVL2_SUPPORTED = False 595 SEMI_ANTI_JOIN_WITH_SIDE = False 596 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 597 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 598 LAST_DAY_SUPPORTS_DATE_PART = False 599 JSON_KEY_VALUE_PAIR_SEP = "," 600 IGNORE_NULLS_IN_FUNC = True 601 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 602 SUPPORTS_CREATE_TABLE_LIKE = False 603 MULTI_ARG_DISTINCT = False 604 CAN_IMPLEMENT_ARRAY_ANY = True 605 SUPPORTS_TO_NUMBER = False 606 COPY_HAS_INTO_KEYWORD = False 607 STAR_EXCEPT = "EXCLUDE" 608 PAD_FILL_PATTERN_IS_REQUIRED = True 609 ARRAY_CONCAT_IS_VAR_LEN = False 610 ARRAY_SIZE_DIM_REQUIRED = False 611 612 TRANSFORMS = { 613 **generator.Generator.TRANSFORMS, 614 exp.ApproxDistinct: approx_count_distinct_sql, 615 exp.Array: inline_array_unless_query, 616 exp.ArrayFilter: rename_func("LIST_FILTER"), 617 exp.ArraySort: _array_sort_sql, 618 exp.ArraySum: rename_func("LIST_SUM"), 619 exp.BitwiseXor: rename_func("XOR"), 620 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 621 exp.CurrentDate: lambda *_: "CURRENT_DATE", 622 exp.CurrentTime: lambda *_: "CURRENT_TIME", 623 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 624 exp.DayOfMonth: rename_func("DAYOFMONTH"), 625 exp.DayOfWeek: rename_func("DAYOFWEEK"), 626 exp.DayOfWeekIso: rename_func("ISODOW"), 627 exp.DayOfYear: rename_func("DAYOFYEAR"), 628 exp.DataType: _datatype_sql, 629 exp.Date: _date_sql, 630 exp.DateAdd: _date_delta_sql, 631 exp.DateFromParts: rename_func("MAKE_DATE"), 632 exp.DateSub: _date_delta_sql, 633 exp.DateDiff: _date_diff_sql, 634 exp.DateStrToDate: datestrtodate_sql, 635 exp.Datetime: no_datetime_sql, 636 exp.DatetimeSub: _date_delta_sql, 637 exp.DatetimeAdd: _date_delta_sql, 638 exp.DateToDi: lambda self, 639 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 640 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 641 exp.DiToDate: lambda self, 642 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 643 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 644 exp.GenerateDateArray: _generate_datetime_array_sql, 645 exp.GenerateTimestampArray: _generate_datetime_array_sql, 646 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 647 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 648 exp.Explode: rename_func("UNNEST"), 649 exp.IntDiv: lambda self, e: self.binary(e, "//"), 650 exp.IsInf: rename_func("ISINF"), 651 exp.IsNan: rename_func("ISNAN"), 652 exp.JSONBExists: rename_func("JSON_EXISTS"), 653 exp.JSONExtract: _arrow_json_extract_sql, 654 exp.JSONExtractArray: _json_extract_value_array_sql, 655 exp.JSONExtractScalar: _arrow_json_extract_sql, 656 exp.JSONFormat: _json_format_sql, 657 exp.JSONValueArray: _json_extract_value_array_sql, 658 exp.Lateral: explode_to_unnest_sql, 659 exp.LogicalOr: rename_func("BOOL_OR"), 660 exp.LogicalAnd: rename_func("BOOL_AND"), 661 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 662 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 663 exp.MonthsBetween: lambda self, e: self.func( 664 "DATEDIFF", 665 "'month'", 666 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 667 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 668 ), 669 exp.PercentileCont: rename_func("QUANTILE_CONT"), 670 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 671 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 672 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 673 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 674 exp.RegexpReplace: lambda self, e: self.func( 675 "REGEXP_REPLACE", 676 e.this, 677 e.expression, 678 e.args.get("replacement"), 679 e.args.get("modifiers"), 680 ), 681 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 682 exp.RegexpILike: lambda self, e: self.func( 683 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 684 ), 685 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 686 exp.Return: lambda self, e: self.sql(e, "this"), 687 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 688 exp.Rand: rename_func("RANDOM"), 689 exp.SHA: rename_func("SHA1"), 690 exp.SHA2: sha256_sql, 691 exp.Split: rename_func("STR_SPLIT"), 692 exp.SortArray: _sort_array_sql, 693 exp.StrPosition: strposition_sql, 694 exp.StrToUnix: lambda self, e: self.func( 695 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 696 ), 697 exp.Struct: _struct_sql, 698 exp.Transform: rename_func("LIST_TRANSFORM"), 699 exp.TimeAdd: _date_delta_sql, 700 exp.Time: no_time_sql, 701 exp.TimeDiff: _timediff_sql, 702 exp.Timestamp: no_timestamp_sql, 703 exp.TimestampDiff: lambda self, e: self.func( 704 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 705 ), 706 exp.TimestampTrunc: timestamptrunc_sql(), 707 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 708 exp.TimeStrToTime: timestrtotime_sql, 709 exp.TimeStrToUnix: lambda self, e: self.func( 710 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 711 ), 712 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 713 exp.TimeToUnix: rename_func("EPOCH"), 714 exp.TsOrDiToDi: lambda self, 715 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 716 exp.TsOrDsAdd: _date_delta_sql, 717 exp.TsOrDsDiff: lambda self, e: self.func( 718 "DATE_DIFF", 719 f"'{e.args.get('unit') or 'DAY'}'", 720 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 721 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 722 ), 723 exp.UnixToStr: lambda self, e: self.func( 724 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 725 ), 726 exp.DatetimeTrunc: lambda self, e: self.func( 727 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 728 ), 729 exp.UnixToTime: _unix_to_time_sql, 730 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 731 exp.VariancePop: rename_func("VAR_POP"), 732 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 733 exp.Xor: bool_xor_sql, 734 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 735 rename_func("LEVENSHTEIN") 736 ), 737 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 738 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 739 exp.DateBin: rename_func("TIME_BUCKET"), 740 } 741 742 SUPPORTED_JSON_PATH_PARTS = { 743 exp.JSONPathKey, 744 exp.JSONPathRoot, 745 exp.JSONPathSubscript, 746 exp.JSONPathWildcard, 747 } 748 749 TYPE_MAPPING = { 750 **generator.Generator.TYPE_MAPPING, 751 exp.DataType.Type.BINARY: "BLOB", 752 exp.DataType.Type.BPCHAR: "TEXT", 753 exp.DataType.Type.CHAR: "TEXT", 754 exp.DataType.Type.DATETIME: "TIMESTAMP", 755 exp.DataType.Type.FLOAT: "REAL", 756 exp.DataType.Type.JSONB: "JSON", 757 exp.DataType.Type.NCHAR: "TEXT", 758 exp.DataType.Type.NVARCHAR: "TEXT", 759 exp.DataType.Type.UINT: "UINTEGER", 760 exp.DataType.Type.VARBINARY: "BLOB", 761 exp.DataType.Type.ROWVERSION: "BLOB", 762 exp.DataType.Type.VARCHAR: "TEXT", 763 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 764 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 765 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 766 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 767 } 768 769 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 770 RESERVED_KEYWORDS = { 771 "array", 772 "analyse", 773 "union", 774 "all", 775 "when", 776 "in_p", 777 "default", 778 "create_p", 779 "window", 780 "asymmetric", 781 "to", 782 "else", 783 "localtime", 784 "from", 785 "end_p", 786 "select", 787 "current_date", 788 "foreign", 789 "with", 790 "grant", 791 "session_user", 792 "or", 793 "except", 794 "references", 795 "fetch", 796 "limit", 797 "group_p", 798 "leading", 799 "into", 800 "collate", 801 "offset", 802 "do", 803 "then", 804 "localtimestamp", 805 "check_p", 806 "lateral_p", 807 "current_role", 808 "where", 809 "asc_p", 810 "placing", 811 "desc_p", 812 "user", 813 "unique", 814 "initially", 815 "column", 816 "both", 817 "some", 818 "as", 819 "any", 820 "only", 821 "deferrable", 822 "null_p", 823 "current_time", 824 "true_p", 825 "table", 826 "case", 827 "trailing", 828 "variadic", 829 "for", 830 "on", 831 "distinct", 832 "false_p", 833 "not", 834 "constraint", 835 "current_timestamp", 836 "returning", 837 "primary", 838 "intersect", 839 "having", 840 "analyze", 841 "current_user", 842 "and", 843 "cast", 844 "symmetric", 845 "using", 846 "order", 847 "current_catalog", 848 } 849 850 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 851 852 # DuckDB doesn't generally support CREATE TABLE .. properties 853 # https://duckdb.org/docs/sql/statements/create_table.html 854 PROPERTIES_LOCATION = { 855 prop: exp.Properties.Location.UNSUPPORTED 856 for prop in generator.Generator.PROPERTIES_LOCATION 857 } 858 859 # There are a few exceptions (e.g. temporary tables) which are supported or 860 # can be transpiled to DuckDB, so we explicitly override them accordingly 861 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 862 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 863 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 864 865 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 866 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 867 868 def strtotime_sql(self, expression: exp.StrToTime) -> str: 869 if expression.args.get("safe"): 870 formatted_time = self.format_time(expression) 871 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 872 return str_to_time_sql(self, expression) 873 874 def strtodate_sql(self, expression: exp.StrToDate) -> str: 875 if expression.args.get("safe"): 876 formatted_time = self.format_time(expression) 877 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 878 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 879 880 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 881 arg = expression.this 882 if expression.args.get("safe"): 883 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 884 return self.func("JSON", arg) 885 886 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 887 nano = expression.args.get("nano") 888 if nano is not None: 889 expression.set( 890 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 891 ) 892 893 return rename_func("MAKE_TIME")(self, expression) 894 895 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 896 sec = expression.args["sec"] 897 898 milli = expression.args.get("milli") 899 if milli is not None: 900 sec += milli.pop() / exp.Literal.number(1000.0) 901 902 nano = expression.args.get("nano") 903 if nano is not None: 904 sec += nano.pop() / exp.Literal.number(1000000000.0) 905 906 if milli or nano: 907 expression.set("sec", sec) 908 909 return rename_func("MAKE_TIMESTAMP")(self, expression) 910 911 def tablesample_sql( 912 self, 913 expression: exp.TableSample, 914 tablesample_keyword: t.Optional[str] = None, 915 ) -> str: 916 if not isinstance(expression.parent, exp.Select): 917 # This sample clause only applies to a single source, not the entire resulting relation 918 tablesample_keyword = "TABLESAMPLE" 919 920 if expression.args.get("size"): 921 method = expression.args.get("method") 922 if method and method.name.upper() != "RESERVOIR": 923 self.unsupported( 924 f"Sampling method {method} is not supported with a discrete sample count, " 925 "defaulting to reservoir sampling" 926 ) 927 expression.set("method", exp.var("RESERVOIR")) 928 929 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 930 931 def interval_sql(self, expression: exp.Interval) -> str: 932 multiplier: t.Optional[int] = None 933 unit = expression.text("unit").lower() 934 935 if unit.startswith("week"): 936 multiplier = 7 937 if unit.startswith("quarter"): 938 multiplier = 90 939 940 if multiplier: 941 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 942 943 return super().interval_sql(expression) 944 945 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 946 if isinstance(expression.parent, exp.UserDefinedFunction): 947 return self.sql(expression, "this") 948 return super().columndef_sql(expression, sep) 949 950 def join_sql(self, expression: exp.Join) -> str: 951 if ( 952 expression.side == "LEFT" 953 and not expression.args.get("on") 954 and isinstance(expression.this, exp.Unnest) 955 ): 956 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 957 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 958 return super().join_sql(expression.on(exp.true())) 959 960 return super().join_sql(expression) 961 962 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 963 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 964 if expression.args.get("is_end_exclusive"): 965 return rename_func("RANGE")(self, expression) 966 967 return self.function_fallback_sql(expression) 968 969 def countif_sql(self, expression: exp.CountIf) -> str: 970 if self.dialect.version >= Version("1.2"): 971 return self.function_fallback_sql(expression) 972 973 # https://github.com/tobymao/sqlglot/pull/4749 974 return count_if_to_sum(self, expression) 975 976 def bracket_sql(self, expression: exp.Bracket) -> str: 977 if self.dialect.version >= Version("1.2"): 978 return super().bracket_sql(expression) 979 980 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 981 this = expression.this 982 if isinstance(this, exp.Array): 983 this.replace(exp.paren(this)) 984 985 bracket = super().bracket_sql(expression) 986 987 if not expression.args.get("returns_list_for_maps"): 988 if not this.type: 989 from sqlglot.optimizer.annotate_types import annotate_types 990 991 this = annotate_types(this) 992 993 if this.is_type(exp.DataType.Type.MAP): 994 bracket = f"({bracket})[1]" 995 996 return bracket 997 998 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 999 expression_sql = self.sql(expression, "expression") 1000 1001 func = expression.this 1002 if isinstance(func, exp.PERCENTILES): 1003 # Make the order key the first arg and slide the fraction to the right 1004 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1005 order_col = expression.find(exp.Ordered) 1006 if order_col: 1007 func.set("expression", func.this) 1008 func.set("this", order_col.this) 1009 1010 this = self.sql(expression, "this").rstrip(")") 1011 1012 return f"{this}{expression_sql})" 1013 1014 def length_sql(self, expression: exp.Length) -> str: 1015 arg = expression.this 1016 1017 # Dialects like BQ and Snowflake also accept binary values as args, so 1018 # DDB will attempt to infer the type or resort to case/when resolution 1019 if not expression.args.get("binary") or arg.is_string: 1020 return self.func("LENGTH", arg) 1021 1022 if not arg.type: 1023 from sqlglot.optimizer.annotate_types import annotate_types 1024 1025 arg = annotate_types(arg) 1026 1027 if arg.is_type(*exp.DataType.TEXT_TYPES): 1028 return self.func("LENGTH", arg) 1029 1030 # We need these casts to make duckdb's static type checker happy 1031 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1032 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1033 1034 case = ( 1035 exp.case(self.func("TYPEOF", arg)) 1036 .when( 1037 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 1038 ) # anonymous to break length_sql recursion 1039 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1040 ) 1041 1042 return self.sql(case) 1043 1044 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1045 this = expression.this 1046 key = expression.args.get("key") 1047 key_sql = key.name if isinstance(key, exp.Expression) else "" 1048 value_sql = self.sql(expression, "value") 1049 1050 kv_sql = f"{key_sql} := {value_sql}" 1051 1052 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1053 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1054 if isinstance(this, exp.Struct) and not this.expressions: 1055 return self.func("STRUCT_PACK", kv_sql) 1056 1057 return self.func("STRUCT_INSERT", this, kv_sql) 1058 1059 def unnest_sql(self, expression: exp.Unnest) -> str: 1060 explode_array = expression.args.get("explode_array") 1061 if explode_array: 1062 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1063 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1064 expression.expressions.append( 1065 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1066 ) 1067 1068 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1069 alias = expression.args.get("alias") 1070 if alias: 1071 expression.set("alias", None) 1072 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1073 1074 unnest_sql = super().unnest_sql(expression) 1075 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1076 return self.sql(select) 1077 1078 return super().unnest_sql(expression) 1079 1080 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1081 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1082 # DuckDB should render IGNORE NULLS only for the general-purpose 1083 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1084 return super().ignorenulls_sql(expression) 1085 1086 return self.sql(expression, "this") 1087 1088 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1089 this = self.sql(expression, "this") 1090 null_text = self.sql(expression, "null") 1091 1092 if null_text: 1093 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1094 1095 return self.func("ARRAY_TO_STRING", this, expression.expression) 1096 1097 @unsupported_args("position", "occurrence") 1098 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1099 group = expression.args.get("group") 1100 params = expression.args.get("parameters") 1101 1102 # Do not render group if there is no following argument, 1103 # and it's the default value for this dialect 1104 if ( 1105 not params 1106 and group 1107 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1108 ): 1109 group = None 1110 return self.func( 1111 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1112 ) 1113 1114 @unsupported_args("culture") 1115 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1116 fmt = expression.args.get("format") 1117 if fmt and fmt.is_int: 1118 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1119 1120 self.unsupported("Only integer formats are supported by NumberToStr") 1121 return self.function_fallback_sql(expression) 1122 1123 def autoincrementcolumnconstraint_sql(self, _) -> str: 1124 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1125 return ""
280class DuckDB(Dialect): 281 NULL_ORDERING = "nulls_are_last" 282 SUPPORTS_USER_DEFINED_TYPES = True 283 SAFE_DIVISION = True 284 INDEX_OFFSET = 1 285 CONCAT_COALESCE = True 286 SUPPORTS_ORDER_BY_ALL = True 287 SUPPORTS_FIXED_SIZE_ARRAYS = True 288 STRICT_JSON_PATH_SYNTAX = False 289 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 290 291 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 292 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 293 294 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 295 if isinstance(path, exp.Literal): 296 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 297 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 298 # This check ensures we'll avoid trying to parse these as JSON paths, which can 299 # either result in a noisy warning or in an invalid representation of the path. 300 path_text = path.name 301 if path_text.startswith("/") or "[#" in path_text: 302 return path 303 304 return super().to_json_path(path) 305 306 class Tokenizer(tokens.Tokenizer): 307 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 308 HEREDOC_STRINGS = ["$"] 309 310 HEREDOC_TAG_IS_IDENTIFIER = True 311 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 312 313 KEYWORDS = { 314 **tokens.Tokenizer.KEYWORDS, 315 "//": TokenType.DIV, 316 "**": TokenType.DSTAR, 317 "^@": TokenType.CARET_AT, 318 "@>": TokenType.AT_GT, 319 "<@": TokenType.LT_AT, 320 "ATTACH": TokenType.ATTACH, 321 "BINARY": TokenType.VARBINARY, 322 "BITSTRING": TokenType.BIT, 323 "BPCHAR": TokenType.TEXT, 324 "CHAR": TokenType.TEXT, 325 "CHARACTER VARYING": TokenType.TEXT, 326 "DATETIME": TokenType.TIMESTAMPNTZ, 327 "DETACH": TokenType.DETACH, 328 "EXCLUDE": TokenType.EXCEPT, 329 "LOGICAL": TokenType.BOOLEAN, 330 "ONLY": TokenType.ONLY, 331 "PIVOT_WIDER": TokenType.PIVOT, 332 "POSITIONAL": TokenType.POSITIONAL, 333 "SIGNED": TokenType.INT, 334 "STRING": TokenType.TEXT, 335 "SUMMARIZE": TokenType.SUMMARIZE, 336 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 337 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 338 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 339 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 340 "TIMESTAMP_US": TokenType.TIMESTAMP, 341 "UBIGINT": TokenType.UBIGINT, 342 "UINTEGER": TokenType.UINT, 343 "USMALLINT": TokenType.USMALLINT, 344 "UTINYINT": TokenType.UTINYINT, 345 "VARCHAR": TokenType.TEXT, 346 } 347 KEYWORDS.pop("/*+") 348 349 SINGLE_TOKENS = { 350 **tokens.Tokenizer.SINGLE_TOKENS, 351 "$": TokenType.PARAMETER, 352 } 353 354 class Parser(parser.Parser): 355 BITWISE = { 356 **parser.Parser.BITWISE, 357 TokenType.TILDA: exp.RegexpLike, 358 } 359 BITWISE.pop(TokenType.CARET) 360 361 RANGE_PARSERS = { 362 **parser.Parser.RANGE_PARSERS, 363 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 364 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 365 } 366 367 EXPONENT = { 368 **parser.Parser.EXPONENT, 369 TokenType.CARET: exp.Pow, 370 TokenType.DSTAR: exp.Pow, 371 } 372 373 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 374 375 FUNCTIONS = { 376 **parser.Parser.FUNCTIONS, 377 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 378 "ARRAY_SORT": exp.SortArray.from_arg_list, 379 "DATEDIFF": _build_date_diff, 380 "DATE_DIFF": _build_date_diff, 381 "DATE_TRUNC": date_trunc_to_time, 382 "DATETRUNC": date_trunc_to_time, 383 "DECODE": lambda args: exp.Decode( 384 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 385 ), 386 "EDITDIST3": exp.Levenshtein.from_arg_list, 387 "ENCODE": lambda args: exp.Encode( 388 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 389 ), 390 "EPOCH": exp.TimeToUnix.from_arg_list, 391 "EPOCH_MS": lambda args: exp.UnixToTime( 392 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 393 ), 394 "GENERATE_SERIES": _build_generate_series(), 395 "JSON": exp.ParseJSON.from_arg_list, 396 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 397 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 398 "LIST_HAS": exp.ArrayContains.from_arg_list, 399 "LIST_REVERSE_SORT": _build_sort_array_desc, 400 "LIST_SORT": exp.SortArray.from_arg_list, 401 "LIST_VALUE": lambda args: exp.Array(expressions=args), 402 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 403 "MAKE_TIMESTAMP": _build_make_timestamp, 404 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 405 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 406 "RANGE": _build_generate_series(end_exclusive=True), 407 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 408 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 409 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 410 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 411 this=seq_get(args, 0), 412 expression=seq_get(args, 1), 413 replacement=seq_get(args, 2), 414 modifiers=seq_get(args, 3), 415 ), 416 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 417 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 418 "STRING_SPLIT": exp.Split.from_arg_list, 419 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 420 "STRING_TO_ARRAY": exp.Split.from_arg_list, 421 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 422 "STRUCT_PACK": exp.Struct.from_arg_list, 423 "STR_SPLIT": exp.Split.from_arg_list, 424 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 425 "TIME_BUCKET": exp.DateBin.from_arg_list, 426 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 427 "UNNEST": exp.Explode.from_arg_list, 428 "XOR": binary_from_function(exp.BitwiseXor), 429 } 430 431 FUNCTIONS.pop("DATE_SUB") 432 FUNCTIONS.pop("GLOB") 433 434 FUNCTION_PARSERS = { 435 **parser.Parser.FUNCTION_PARSERS, 436 **dict.fromkeys( 437 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 438 ), 439 } 440 FUNCTION_PARSERS.pop("DECODE") 441 442 NO_PAREN_FUNCTION_PARSERS = { 443 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 444 "MAP": lambda self: self._parse_map(), 445 } 446 447 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 448 TokenType.SEMI, 449 TokenType.ANTI, 450 } 451 452 PLACEHOLDER_PARSERS = { 453 **parser.Parser.PLACEHOLDER_PARSERS, 454 TokenType.PARAMETER: lambda self: ( 455 self.expression(exp.Placeholder, this=self._prev.text) 456 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 457 else None 458 ), 459 } 460 461 TYPE_CONVERTERS = { 462 # https://duckdb.org/docs/sql/data_types/numeric 463 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 464 # https://duckdb.org/docs/sql/data_types/text 465 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 466 } 467 468 STATEMENT_PARSERS = { 469 **parser.Parser.STATEMENT_PARSERS, 470 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 471 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 472 } 473 474 def _parse_expression(self) -> t.Optional[exp.Expression]: 475 # DuckDB supports prefix aliases, e.g. foo: 1 476 if self._next and self._next.token_type == TokenType.COLON: 477 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 478 self._match(TokenType.COLON) 479 comments = self._prev_comments or [] 480 481 this = self._parse_assignment() 482 if isinstance(this, exp.Expression): 483 # Moves the comment next to the alias in `alias: expr /* comment */` 484 comments += this.pop_comments() or [] 485 486 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 487 488 return super()._parse_expression() 489 490 def _parse_table( 491 self, 492 schema: bool = False, 493 joins: bool = False, 494 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 495 parse_bracket: bool = False, 496 is_db_reference: bool = False, 497 parse_partition: bool = False, 498 ) -> t.Optional[exp.Expression]: 499 # DuckDB supports prefix aliases, e.g. FROM foo: bar 500 if self._next and self._next.token_type == TokenType.COLON: 501 alias = self._parse_table_alias( 502 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 503 ) 504 self._match(TokenType.COLON) 505 comments = self._prev_comments or [] 506 else: 507 alias = None 508 comments = [] 509 510 table = super()._parse_table( 511 schema=schema, 512 joins=joins, 513 alias_tokens=alias_tokens, 514 parse_bracket=parse_bracket, 515 is_db_reference=is_db_reference, 516 parse_partition=parse_partition, 517 ) 518 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 519 # Moves the comment next to the alias in `alias: table /* comment */` 520 comments += table.pop_comments() or [] 521 alias.comments = alias.pop_comments() + comments 522 table.set("alias", alias) 523 524 return table 525 526 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 527 # https://duckdb.org/docs/sql/samples.html 528 sample = super()._parse_table_sample(as_modifier=as_modifier) 529 if sample and not sample.args.get("method"): 530 if sample.args.get("size"): 531 sample.set("method", exp.var("RESERVOIR")) 532 else: 533 sample.set("method", exp.var("SYSTEM")) 534 535 return sample 536 537 def _parse_bracket( 538 self, this: t.Optional[exp.Expression] = None 539 ) -> t.Optional[exp.Expression]: 540 bracket = super()._parse_bracket(this) 541 542 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 543 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 544 bracket.set("returns_list_for_maps", True) 545 546 return bracket 547 548 def _parse_map(self) -> exp.ToMap | exp.Map: 549 if self._match(TokenType.L_BRACE, advance=False): 550 return self.expression(exp.ToMap, this=self._parse_bracket()) 551 552 args = self._parse_wrapped_csv(self._parse_assignment) 553 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 554 555 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 556 return self._parse_field_def() 557 558 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 559 if len(aggregations) == 1: 560 return super()._pivot_column_names(aggregations) 561 return pivot_column_names(aggregations, dialect="duckdb") 562 563 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 564 def _parse_attach_option() -> exp.AttachOption: 565 return self.expression( 566 exp.AttachOption, 567 this=self._parse_var(any_token=True), 568 expression=self._parse_field(any_token=True), 569 ) 570 571 self._match(TokenType.DATABASE) 572 exists = self._parse_exists(not_=is_attach) 573 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 574 575 if self._match(TokenType.L_PAREN, advance=False): 576 expressions = self._parse_wrapped_csv(_parse_attach_option) 577 else: 578 expressions = None 579 580 return ( 581 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 582 if is_attach 583 else self.expression(exp.Detach, this=this, exists=exists) 584 ) 585 586 class Generator(generator.Generator): 587 PARAMETER_TOKEN = "$" 588 NAMED_PLACEHOLDER_TOKEN = "$" 589 JOIN_HINTS = False 590 TABLE_HINTS = False 591 QUERY_HINTS = False 592 LIMIT_FETCH = "LIMIT" 593 STRUCT_DELIMITER = ("(", ")") 594 RENAME_TABLE_WITH_DB = False 595 NVL2_SUPPORTED = False 596 SEMI_ANTI_JOIN_WITH_SIDE = False 597 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 598 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 599 LAST_DAY_SUPPORTS_DATE_PART = False 600 JSON_KEY_VALUE_PAIR_SEP = "," 601 IGNORE_NULLS_IN_FUNC = True 602 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 603 SUPPORTS_CREATE_TABLE_LIKE = False 604 MULTI_ARG_DISTINCT = False 605 CAN_IMPLEMENT_ARRAY_ANY = True 606 SUPPORTS_TO_NUMBER = False 607 COPY_HAS_INTO_KEYWORD = False 608 STAR_EXCEPT = "EXCLUDE" 609 PAD_FILL_PATTERN_IS_REQUIRED = True 610 ARRAY_CONCAT_IS_VAR_LEN = False 611 ARRAY_SIZE_DIM_REQUIRED = False 612 613 TRANSFORMS = { 614 **generator.Generator.TRANSFORMS, 615 exp.ApproxDistinct: approx_count_distinct_sql, 616 exp.Array: inline_array_unless_query, 617 exp.ArrayFilter: rename_func("LIST_FILTER"), 618 exp.ArraySort: _array_sort_sql, 619 exp.ArraySum: rename_func("LIST_SUM"), 620 exp.BitwiseXor: rename_func("XOR"), 621 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 622 exp.CurrentDate: lambda *_: "CURRENT_DATE", 623 exp.CurrentTime: lambda *_: "CURRENT_TIME", 624 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 625 exp.DayOfMonth: rename_func("DAYOFMONTH"), 626 exp.DayOfWeek: rename_func("DAYOFWEEK"), 627 exp.DayOfWeekIso: rename_func("ISODOW"), 628 exp.DayOfYear: rename_func("DAYOFYEAR"), 629 exp.DataType: _datatype_sql, 630 exp.Date: _date_sql, 631 exp.DateAdd: _date_delta_sql, 632 exp.DateFromParts: rename_func("MAKE_DATE"), 633 exp.DateSub: _date_delta_sql, 634 exp.DateDiff: _date_diff_sql, 635 exp.DateStrToDate: datestrtodate_sql, 636 exp.Datetime: no_datetime_sql, 637 exp.DatetimeSub: _date_delta_sql, 638 exp.DatetimeAdd: _date_delta_sql, 639 exp.DateToDi: lambda self, 640 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 641 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 642 exp.DiToDate: lambda self, 643 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 644 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 645 exp.GenerateDateArray: _generate_datetime_array_sql, 646 exp.GenerateTimestampArray: _generate_datetime_array_sql, 647 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 648 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 649 exp.Explode: rename_func("UNNEST"), 650 exp.IntDiv: lambda self, e: self.binary(e, "//"), 651 exp.IsInf: rename_func("ISINF"), 652 exp.IsNan: rename_func("ISNAN"), 653 exp.JSONBExists: rename_func("JSON_EXISTS"), 654 exp.JSONExtract: _arrow_json_extract_sql, 655 exp.JSONExtractArray: _json_extract_value_array_sql, 656 exp.JSONExtractScalar: _arrow_json_extract_sql, 657 exp.JSONFormat: _json_format_sql, 658 exp.JSONValueArray: _json_extract_value_array_sql, 659 exp.Lateral: explode_to_unnest_sql, 660 exp.LogicalOr: rename_func("BOOL_OR"), 661 exp.LogicalAnd: rename_func("BOOL_AND"), 662 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 663 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 664 exp.MonthsBetween: lambda self, e: self.func( 665 "DATEDIFF", 666 "'month'", 667 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 668 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 669 ), 670 exp.PercentileCont: rename_func("QUANTILE_CONT"), 671 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 672 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 673 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 674 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 675 exp.RegexpReplace: lambda self, e: self.func( 676 "REGEXP_REPLACE", 677 e.this, 678 e.expression, 679 e.args.get("replacement"), 680 e.args.get("modifiers"), 681 ), 682 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 683 exp.RegexpILike: lambda self, e: self.func( 684 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 685 ), 686 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 687 exp.Return: lambda self, e: self.sql(e, "this"), 688 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 689 exp.Rand: rename_func("RANDOM"), 690 exp.SHA: rename_func("SHA1"), 691 exp.SHA2: sha256_sql, 692 exp.Split: rename_func("STR_SPLIT"), 693 exp.SortArray: _sort_array_sql, 694 exp.StrPosition: strposition_sql, 695 exp.StrToUnix: lambda self, e: self.func( 696 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 697 ), 698 exp.Struct: _struct_sql, 699 exp.Transform: rename_func("LIST_TRANSFORM"), 700 exp.TimeAdd: _date_delta_sql, 701 exp.Time: no_time_sql, 702 exp.TimeDiff: _timediff_sql, 703 exp.Timestamp: no_timestamp_sql, 704 exp.TimestampDiff: lambda self, e: self.func( 705 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 706 ), 707 exp.TimestampTrunc: timestamptrunc_sql(), 708 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 709 exp.TimeStrToTime: timestrtotime_sql, 710 exp.TimeStrToUnix: lambda self, e: self.func( 711 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 712 ), 713 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 714 exp.TimeToUnix: rename_func("EPOCH"), 715 exp.TsOrDiToDi: lambda self, 716 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 717 exp.TsOrDsAdd: _date_delta_sql, 718 exp.TsOrDsDiff: lambda self, e: self.func( 719 "DATE_DIFF", 720 f"'{e.args.get('unit') or 'DAY'}'", 721 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 722 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 723 ), 724 exp.UnixToStr: lambda self, e: self.func( 725 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 726 ), 727 exp.DatetimeTrunc: lambda self, e: self.func( 728 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 729 ), 730 exp.UnixToTime: _unix_to_time_sql, 731 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 732 exp.VariancePop: rename_func("VAR_POP"), 733 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 734 exp.Xor: bool_xor_sql, 735 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 736 rename_func("LEVENSHTEIN") 737 ), 738 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 739 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 740 exp.DateBin: rename_func("TIME_BUCKET"), 741 } 742 743 SUPPORTED_JSON_PATH_PARTS = { 744 exp.JSONPathKey, 745 exp.JSONPathRoot, 746 exp.JSONPathSubscript, 747 exp.JSONPathWildcard, 748 } 749 750 TYPE_MAPPING = { 751 **generator.Generator.TYPE_MAPPING, 752 exp.DataType.Type.BINARY: "BLOB", 753 exp.DataType.Type.BPCHAR: "TEXT", 754 exp.DataType.Type.CHAR: "TEXT", 755 exp.DataType.Type.DATETIME: "TIMESTAMP", 756 exp.DataType.Type.FLOAT: "REAL", 757 exp.DataType.Type.JSONB: "JSON", 758 exp.DataType.Type.NCHAR: "TEXT", 759 exp.DataType.Type.NVARCHAR: "TEXT", 760 exp.DataType.Type.UINT: "UINTEGER", 761 exp.DataType.Type.VARBINARY: "BLOB", 762 exp.DataType.Type.ROWVERSION: "BLOB", 763 exp.DataType.Type.VARCHAR: "TEXT", 764 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 765 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 766 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 767 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 768 } 769 770 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 771 RESERVED_KEYWORDS = { 772 "array", 773 "analyse", 774 "union", 775 "all", 776 "when", 777 "in_p", 778 "default", 779 "create_p", 780 "window", 781 "asymmetric", 782 "to", 783 "else", 784 "localtime", 785 "from", 786 "end_p", 787 "select", 788 "current_date", 789 "foreign", 790 "with", 791 "grant", 792 "session_user", 793 "or", 794 "except", 795 "references", 796 "fetch", 797 "limit", 798 "group_p", 799 "leading", 800 "into", 801 "collate", 802 "offset", 803 "do", 804 "then", 805 "localtimestamp", 806 "check_p", 807 "lateral_p", 808 "current_role", 809 "where", 810 "asc_p", 811 "placing", 812 "desc_p", 813 "user", 814 "unique", 815 "initially", 816 "column", 817 "both", 818 "some", 819 "as", 820 "any", 821 "only", 822 "deferrable", 823 "null_p", 824 "current_time", 825 "true_p", 826 "table", 827 "case", 828 "trailing", 829 "variadic", 830 "for", 831 "on", 832 "distinct", 833 "false_p", 834 "not", 835 "constraint", 836 "current_timestamp", 837 "returning", 838 "primary", 839 "intersect", 840 "having", 841 "analyze", 842 "current_user", 843 "and", 844 "cast", 845 "symmetric", 846 "using", 847 "order", 848 "current_catalog", 849 } 850 851 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 852 853 # DuckDB doesn't generally support CREATE TABLE .. properties 854 # https://duckdb.org/docs/sql/statements/create_table.html 855 PROPERTIES_LOCATION = { 856 prop: exp.Properties.Location.UNSUPPORTED 857 for prop in generator.Generator.PROPERTIES_LOCATION 858 } 859 860 # There are a few exceptions (e.g. temporary tables) which are supported or 861 # can be transpiled to DuckDB, so we explicitly override them accordingly 862 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 863 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 864 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 865 866 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 867 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 868 869 def strtotime_sql(self, expression: exp.StrToTime) -> str: 870 if expression.args.get("safe"): 871 formatted_time = self.format_time(expression) 872 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 873 return str_to_time_sql(self, expression) 874 875 def strtodate_sql(self, expression: exp.StrToDate) -> str: 876 if expression.args.get("safe"): 877 formatted_time = self.format_time(expression) 878 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 879 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 880 881 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 882 arg = expression.this 883 if expression.args.get("safe"): 884 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 885 return self.func("JSON", arg) 886 887 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 888 nano = expression.args.get("nano") 889 if nano is not None: 890 expression.set( 891 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 892 ) 893 894 return rename_func("MAKE_TIME")(self, expression) 895 896 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 897 sec = expression.args["sec"] 898 899 milli = expression.args.get("milli") 900 if milli is not None: 901 sec += milli.pop() / exp.Literal.number(1000.0) 902 903 nano = expression.args.get("nano") 904 if nano is not None: 905 sec += nano.pop() / exp.Literal.number(1000000000.0) 906 907 if milli or nano: 908 expression.set("sec", sec) 909 910 return rename_func("MAKE_TIMESTAMP")(self, expression) 911 912 def tablesample_sql( 913 self, 914 expression: exp.TableSample, 915 tablesample_keyword: t.Optional[str] = None, 916 ) -> str: 917 if not isinstance(expression.parent, exp.Select): 918 # This sample clause only applies to a single source, not the entire resulting relation 919 tablesample_keyword = "TABLESAMPLE" 920 921 if expression.args.get("size"): 922 method = expression.args.get("method") 923 if method and method.name.upper() != "RESERVOIR": 924 self.unsupported( 925 f"Sampling method {method} is not supported with a discrete sample count, " 926 "defaulting to reservoir sampling" 927 ) 928 expression.set("method", exp.var("RESERVOIR")) 929 930 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 931 932 def interval_sql(self, expression: exp.Interval) -> str: 933 multiplier: t.Optional[int] = None 934 unit = expression.text("unit").lower() 935 936 if unit.startswith("week"): 937 multiplier = 7 938 if unit.startswith("quarter"): 939 multiplier = 90 940 941 if multiplier: 942 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 943 944 return super().interval_sql(expression) 945 946 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 947 if isinstance(expression.parent, exp.UserDefinedFunction): 948 return self.sql(expression, "this") 949 return super().columndef_sql(expression, sep) 950 951 def join_sql(self, expression: exp.Join) -> str: 952 if ( 953 expression.side == "LEFT" 954 and not expression.args.get("on") 955 and isinstance(expression.this, exp.Unnest) 956 ): 957 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 958 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 959 return super().join_sql(expression.on(exp.true())) 960 961 return super().join_sql(expression) 962 963 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 964 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 965 if expression.args.get("is_end_exclusive"): 966 return rename_func("RANGE")(self, expression) 967 968 return self.function_fallback_sql(expression) 969 970 def countif_sql(self, expression: exp.CountIf) -> str: 971 if self.dialect.version >= Version("1.2"): 972 return self.function_fallback_sql(expression) 973 974 # https://github.com/tobymao/sqlglot/pull/4749 975 return count_if_to_sum(self, expression) 976 977 def bracket_sql(self, expression: exp.Bracket) -> str: 978 if self.dialect.version >= Version("1.2"): 979 return super().bracket_sql(expression) 980 981 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 982 this = expression.this 983 if isinstance(this, exp.Array): 984 this.replace(exp.paren(this)) 985 986 bracket = super().bracket_sql(expression) 987 988 if not expression.args.get("returns_list_for_maps"): 989 if not this.type: 990 from sqlglot.optimizer.annotate_types import annotate_types 991 992 this = annotate_types(this) 993 994 if this.is_type(exp.DataType.Type.MAP): 995 bracket = f"({bracket})[1]" 996 997 return bracket 998 999 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1000 expression_sql = self.sql(expression, "expression") 1001 1002 func = expression.this 1003 if isinstance(func, exp.PERCENTILES): 1004 # Make the order key the first arg and slide the fraction to the right 1005 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1006 order_col = expression.find(exp.Ordered) 1007 if order_col: 1008 func.set("expression", func.this) 1009 func.set("this", order_col.this) 1010 1011 this = self.sql(expression, "this").rstrip(")") 1012 1013 return f"{this}{expression_sql})" 1014 1015 def length_sql(self, expression: exp.Length) -> str: 1016 arg = expression.this 1017 1018 # Dialects like BQ and Snowflake also accept binary values as args, so 1019 # DDB will attempt to infer the type or resort to case/when resolution 1020 if not expression.args.get("binary") or arg.is_string: 1021 return self.func("LENGTH", arg) 1022 1023 if not arg.type: 1024 from sqlglot.optimizer.annotate_types import annotate_types 1025 1026 arg = annotate_types(arg) 1027 1028 if arg.is_type(*exp.DataType.TEXT_TYPES): 1029 return self.func("LENGTH", arg) 1030 1031 # We need these casts to make duckdb's static type checker happy 1032 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1033 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1034 1035 case = ( 1036 exp.case(self.func("TYPEOF", arg)) 1037 .when( 1038 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 1039 ) # anonymous to break length_sql recursion 1040 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1041 ) 1042 1043 return self.sql(case) 1044 1045 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1046 this = expression.this 1047 key = expression.args.get("key") 1048 key_sql = key.name if isinstance(key, exp.Expression) else "" 1049 value_sql = self.sql(expression, "value") 1050 1051 kv_sql = f"{key_sql} := {value_sql}" 1052 1053 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1054 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1055 if isinstance(this, exp.Struct) and not this.expressions: 1056 return self.func("STRUCT_PACK", kv_sql) 1057 1058 return self.func("STRUCT_INSERT", this, kv_sql) 1059 1060 def unnest_sql(self, expression: exp.Unnest) -> str: 1061 explode_array = expression.args.get("explode_array") 1062 if explode_array: 1063 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1064 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1065 expression.expressions.append( 1066 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1067 ) 1068 1069 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1070 alias = expression.args.get("alias") 1071 if alias: 1072 expression.set("alias", None) 1073 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1074 1075 unnest_sql = super().unnest_sql(expression) 1076 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1077 return self.sql(select) 1078 1079 return super().unnest_sql(expression) 1080 1081 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1082 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1083 # DuckDB should render IGNORE NULLS only for the general-purpose 1084 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1085 return super().ignorenulls_sql(expression) 1086 1087 return self.sql(expression, "this") 1088 1089 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1090 this = self.sql(expression, "this") 1091 null_text = self.sql(expression, "null") 1092 1093 if null_text: 1094 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1095 1096 return self.func("ARRAY_TO_STRING", this, expression.expression) 1097 1098 @unsupported_args("position", "occurrence") 1099 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1100 group = expression.args.get("group") 1101 params = expression.args.get("parameters") 1102 1103 # Do not render group if there is no following argument, 1104 # and it's the default value for this dialect 1105 if ( 1106 not params 1107 and group 1108 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1109 ): 1110 group = None 1111 return self.func( 1112 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1113 ) 1114 1115 @unsupported_args("culture") 1116 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1117 fmt = expression.args.get("format") 1118 if fmt and fmt.is_int: 1119 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1120 1121 self.unsupported("Only integer formats are supported by NumberToStr") 1122 return self.function_fallback_sql(expression) 1123 1124 def autoincrementcolumnconstraint_sql(self, _) -> str: 1125 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1126 return ""
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
294 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 295 if isinstance(path, exp.Literal): 296 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 297 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 298 # This check ensures we'll avoid trying to parse these as JSON paths, which can 299 # either result in a noisy warning or in an invalid representation of the path. 300 path_text = path.name 301 if path_text.startswith("/") or "[#" in path_text: 302 return path 303 304 return super().to_json_path(path)
306 class Tokenizer(tokens.Tokenizer): 307 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 308 HEREDOC_STRINGS = ["$"] 309 310 HEREDOC_TAG_IS_IDENTIFIER = True 311 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 312 313 KEYWORDS = { 314 **tokens.Tokenizer.KEYWORDS, 315 "//": TokenType.DIV, 316 "**": TokenType.DSTAR, 317 "^@": TokenType.CARET_AT, 318 "@>": TokenType.AT_GT, 319 "<@": TokenType.LT_AT, 320 "ATTACH": TokenType.ATTACH, 321 "BINARY": TokenType.VARBINARY, 322 "BITSTRING": TokenType.BIT, 323 "BPCHAR": TokenType.TEXT, 324 "CHAR": TokenType.TEXT, 325 "CHARACTER VARYING": TokenType.TEXT, 326 "DATETIME": TokenType.TIMESTAMPNTZ, 327 "DETACH": TokenType.DETACH, 328 "EXCLUDE": TokenType.EXCEPT, 329 "LOGICAL": TokenType.BOOLEAN, 330 "ONLY": TokenType.ONLY, 331 "PIVOT_WIDER": TokenType.PIVOT, 332 "POSITIONAL": TokenType.POSITIONAL, 333 "SIGNED": TokenType.INT, 334 "STRING": TokenType.TEXT, 335 "SUMMARIZE": TokenType.SUMMARIZE, 336 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 337 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 338 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 339 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 340 "TIMESTAMP_US": TokenType.TIMESTAMP, 341 "UBIGINT": TokenType.UBIGINT, 342 "UINTEGER": TokenType.UINT, 343 "USMALLINT": TokenType.USMALLINT, 344 "UTINYINT": TokenType.UTINYINT, 345 "VARCHAR": TokenType.TEXT, 346 } 347 KEYWORDS.pop("/*+") 348 349 SINGLE_TOKENS = { 350 **tokens.Tokenizer.SINGLE_TOKENS, 351 "$": TokenType.PARAMETER, 352 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
354 class Parser(parser.Parser): 355 BITWISE = { 356 **parser.Parser.BITWISE, 357 TokenType.TILDA: exp.RegexpLike, 358 } 359 BITWISE.pop(TokenType.CARET) 360 361 RANGE_PARSERS = { 362 **parser.Parser.RANGE_PARSERS, 363 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 364 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 365 } 366 367 EXPONENT = { 368 **parser.Parser.EXPONENT, 369 TokenType.CARET: exp.Pow, 370 TokenType.DSTAR: exp.Pow, 371 } 372 373 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 374 375 FUNCTIONS = { 376 **parser.Parser.FUNCTIONS, 377 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 378 "ARRAY_SORT": exp.SortArray.from_arg_list, 379 "DATEDIFF": _build_date_diff, 380 "DATE_DIFF": _build_date_diff, 381 "DATE_TRUNC": date_trunc_to_time, 382 "DATETRUNC": date_trunc_to_time, 383 "DECODE": lambda args: exp.Decode( 384 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 385 ), 386 "EDITDIST3": exp.Levenshtein.from_arg_list, 387 "ENCODE": lambda args: exp.Encode( 388 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 389 ), 390 "EPOCH": exp.TimeToUnix.from_arg_list, 391 "EPOCH_MS": lambda args: exp.UnixToTime( 392 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 393 ), 394 "GENERATE_SERIES": _build_generate_series(), 395 "JSON": exp.ParseJSON.from_arg_list, 396 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 397 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 398 "LIST_HAS": exp.ArrayContains.from_arg_list, 399 "LIST_REVERSE_SORT": _build_sort_array_desc, 400 "LIST_SORT": exp.SortArray.from_arg_list, 401 "LIST_VALUE": lambda args: exp.Array(expressions=args), 402 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 403 "MAKE_TIMESTAMP": _build_make_timestamp, 404 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 405 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 406 "RANGE": _build_generate_series(end_exclusive=True), 407 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 408 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 409 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 410 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 411 this=seq_get(args, 0), 412 expression=seq_get(args, 1), 413 replacement=seq_get(args, 2), 414 modifiers=seq_get(args, 3), 415 ), 416 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 417 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 418 "STRING_SPLIT": exp.Split.from_arg_list, 419 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 420 "STRING_TO_ARRAY": exp.Split.from_arg_list, 421 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 422 "STRUCT_PACK": exp.Struct.from_arg_list, 423 "STR_SPLIT": exp.Split.from_arg_list, 424 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 425 "TIME_BUCKET": exp.DateBin.from_arg_list, 426 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 427 "UNNEST": exp.Explode.from_arg_list, 428 "XOR": binary_from_function(exp.BitwiseXor), 429 } 430 431 FUNCTIONS.pop("DATE_SUB") 432 FUNCTIONS.pop("GLOB") 433 434 FUNCTION_PARSERS = { 435 **parser.Parser.FUNCTION_PARSERS, 436 **dict.fromkeys( 437 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 438 ), 439 } 440 FUNCTION_PARSERS.pop("DECODE") 441 442 NO_PAREN_FUNCTION_PARSERS = { 443 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 444 "MAP": lambda self: self._parse_map(), 445 } 446 447 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 448 TokenType.SEMI, 449 TokenType.ANTI, 450 } 451 452 PLACEHOLDER_PARSERS = { 453 **parser.Parser.PLACEHOLDER_PARSERS, 454 TokenType.PARAMETER: lambda self: ( 455 self.expression(exp.Placeholder, this=self._prev.text) 456 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 457 else None 458 ), 459 } 460 461 TYPE_CONVERTERS = { 462 # https://duckdb.org/docs/sql/data_types/numeric 463 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 464 # https://duckdb.org/docs/sql/data_types/text 465 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 466 } 467 468 STATEMENT_PARSERS = { 469 **parser.Parser.STATEMENT_PARSERS, 470 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 471 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 472 } 473 474 def _parse_expression(self) -> t.Optional[exp.Expression]: 475 # DuckDB supports prefix aliases, e.g. foo: 1 476 if self._next and self._next.token_type == TokenType.COLON: 477 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 478 self._match(TokenType.COLON) 479 comments = self._prev_comments or [] 480 481 this = self._parse_assignment() 482 if isinstance(this, exp.Expression): 483 # Moves the comment next to the alias in `alias: expr /* comment */` 484 comments += this.pop_comments() or [] 485 486 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 487 488 return super()._parse_expression() 489 490 def _parse_table( 491 self, 492 schema: bool = False, 493 joins: bool = False, 494 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 495 parse_bracket: bool = False, 496 is_db_reference: bool = False, 497 parse_partition: bool = False, 498 ) -> t.Optional[exp.Expression]: 499 # DuckDB supports prefix aliases, e.g. FROM foo: bar 500 if self._next and self._next.token_type == TokenType.COLON: 501 alias = self._parse_table_alias( 502 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 503 ) 504 self._match(TokenType.COLON) 505 comments = self._prev_comments or [] 506 else: 507 alias = None 508 comments = [] 509 510 table = super()._parse_table( 511 schema=schema, 512 joins=joins, 513 alias_tokens=alias_tokens, 514 parse_bracket=parse_bracket, 515 is_db_reference=is_db_reference, 516 parse_partition=parse_partition, 517 ) 518 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 519 # Moves the comment next to the alias in `alias: table /* comment */` 520 comments += table.pop_comments() or [] 521 alias.comments = alias.pop_comments() + comments 522 table.set("alias", alias) 523 524 return table 525 526 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 527 # https://duckdb.org/docs/sql/samples.html 528 sample = super()._parse_table_sample(as_modifier=as_modifier) 529 if sample and not sample.args.get("method"): 530 if sample.args.get("size"): 531 sample.set("method", exp.var("RESERVOIR")) 532 else: 533 sample.set("method", exp.var("SYSTEM")) 534 535 return sample 536 537 def _parse_bracket( 538 self, this: t.Optional[exp.Expression] = None 539 ) -> t.Optional[exp.Expression]: 540 bracket = super()._parse_bracket(this) 541 542 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 543 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 544 bracket.set("returns_list_for_maps", True) 545 546 return bracket 547 548 def _parse_map(self) -> exp.ToMap | exp.Map: 549 if self._match(TokenType.L_BRACE, advance=False): 550 return self.expression(exp.ToMap, this=self._parse_bracket()) 551 552 args = self._parse_wrapped_csv(self._parse_assignment) 553 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 554 555 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 556 return self._parse_field_def() 557 558 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 559 if len(aggregations) == 1: 560 return super()._pivot_column_names(aggregations) 561 return pivot_column_names(aggregations, dialect="duckdb") 562 563 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 564 def _parse_attach_option() -> exp.AttachOption: 565 return self.expression( 566 exp.AttachOption, 567 this=self._parse_var(any_token=True), 568 expression=self._parse_field(any_token=True), 569 ) 570 571 self._match(TokenType.DATABASE) 572 exists = self._parse_exists(not_=is_attach) 573 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 574 575 if self._match(TokenType.L_PAREN, advance=False): 576 expressions = self._parse_wrapped_csv(_parse_attach_option) 577 else: 578 expressions = None 579 580 return ( 581 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 582 if is_attach 583 else self.expression(exp.Detach, this=this, exists=exists) 584 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
586 class Generator(generator.Generator): 587 PARAMETER_TOKEN = "$" 588 NAMED_PLACEHOLDER_TOKEN = "$" 589 JOIN_HINTS = False 590 TABLE_HINTS = False 591 QUERY_HINTS = False 592 LIMIT_FETCH = "LIMIT" 593 STRUCT_DELIMITER = ("(", ")") 594 RENAME_TABLE_WITH_DB = False 595 NVL2_SUPPORTED = False 596 SEMI_ANTI_JOIN_WITH_SIDE = False 597 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 598 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 599 LAST_DAY_SUPPORTS_DATE_PART = False 600 JSON_KEY_VALUE_PAIR_SEP = "," 601 IGNORE_NULLS_IN_FUNC = True 602 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 603 SUPPORTS_CREATE_TABLE_LIKE = False 604 MULTI_ARG_DISTINCT = False 605 CAN_IMPLEMENT_ARRAY_ANY = True 606 SUPPORTS_TO_NUMBER = False 607 COPY_HAS_INTO_KEYWORD = False 608 STAR_EXCEPT = "EXCLUDE" 609 PAD_FILL_PATTERN_IS_REQUIRED = True 610 ARRAY_CONCAT_IS_VAR_LEN = False 611 ARRAY_SIZE_DIM_REQUIRED = False 612 613 TRANSFORMS = { 614 **generator.Generator.TRANSFORMS, 615 exp.ApproxDistinct: approx_count_distinct_sql, 616 exp.Array: inline_array_unless_query, 617 exp.ArrayFilter: rename_func("LIST_FILTER"), 618 exp.ArraySort: _array_sort_sql, 619 exp.ArraySum: rename_func("LIST_SUM"), 620 exp.BitwiseXor: rename_func("XOR"), 621 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 622 exp.CurrentDate: lambda *_: "CURRENT_DATE", 623 exp.CurrentTime: lambda *_: "CURRENT_TIME", 624 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 625 exp.DayOfMonth: rename_func("DAYOFMONTH"), 626 exp.DayOfWeek: rename_func("DAYOFWEEK"), 627 exp.DayOfWeekIso: rename_func("ISODOW"), 628 exp.DayOfYear: rename_func("DAYOFYEAR"), 629 exp.DataType: _datatype_sql, 630 exp.Date: _date_sql, 631 exp.DateAdd: _date_delta_sql, 632 exp.DateFromParts: rename_func("MAKE_DATE"), 633 exp.DateSub: _date_delta_sql, 634 exp.DateDiff: _date_diff_sql, 635 exp.DateStrToDate: datestrtodate_sql, 636 exp.Datetime: no_datetime_sql, 637 exp.DatetimeSub: _date_delta_sql, 638 exp.DatetimeAdd: _date_delta_sql, 639 exp.DateToDi: lambda self, 640 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 641 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 642 exp.DiToDate: lambda self, 643 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 644 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 645 exp.GenerateDateArray: _generate_datetime_array_sql, 646 exp.GenerateTimestampArray: _generate_datetime_array_sql, 647 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 648 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 649 exp.Explode: rename_func("UNNEST"), 650 exp.IntDiv: lambda self, e: self.binary(e, "//"), 651 exp.IsInf: rename_func("ISINF"), 652 exp.IsNan: rename_func("ISNAN"), 653 exp.JSONBExists: rename_func("JSON_EXISTS"), 654 exp.JSONExtract: _arrow_json_extract_sql, 655 exp.JSONExtractArray: _json_extract_value_array_sql, 656 exp.JSONExtractScalar: _arrow_json_extract_sql, 657 exp.JSONFormat: _json_format_sql, 658 exp.JSONValueArray: _json_extract_value_array_sql, 659 exp.Lateral: explode_to_unnest_sql, 660 exp.LogicalOr: rename_func("BOOL_OR"), 661 exp.LogicalAnd: rename_func("BOOL_AND"), 662 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 663 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 664 exp.MonthsBetween: lambda self, e: self.func( 665 "DATEDIFF", 666 "'month'", 667 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 668 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 669 ), 670 exp.PercentileCont: rename_func("QUANTILE_CONT"), 671 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 672 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 673 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 674 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 675 exp.RegexpReplace: lambda self, e: self.func( 676 "REGEXP_REPLACE", 677 e.this, 678 e.expression, 679 e.args.get("replacement"), 680 e.args.get("modifiers"), 681 ), 682 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 683 exp.RegexpILike: lambda self, e: self.func( 684 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 685 ), 686 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 687 exp.Return: lambda self, e: self.sql(e, "this"), 688 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 689 exp.Rand: rename_func("RANDOM"), 690 exp.SHA: rename_func("SHA1"), 691 exp.SHA2: sha256_sql, 692 exp.Split: rename_func("STR_SPLIT"), 693 exp.SortArray: _sort_array_sql, 694 exp.StrPosition: strposition_sql, 695 exp.StrToUnix: lambda self, e: self.func( 696 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 697 ), 698 exp.Struct: _struct_sql, 699 exp.Transform: rename_func("LIST_TRANSFORM"), 700 exp.TimeAdd: _date_delta_sql, 701 exp.Time: no_time_sql, 702 exp.TimeDiff: _timediff_sql, 703 exp.Timestamp: no_timestamp_sql, 704 exp.TimestampDiff: lambda self, e: self.func( 705 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 706 ), 707 exp.TimestampTrunc: timestamptrunc_sql(), 708 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 709 exp.TimeStrToTime: timestrtotime_sql, 710 exp.TimeStrToUnix: lambda self, e: self.func( 711 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 712 ), 713 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 714 exp.TimeToUnix: rename_func("EPOCH"), 715 exp.TsOrDiToDi: lambda self, 716 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 717 exp.TsOrDsAdd: _date_delta_sql, 718 exp.TsOrDsDiff: lambda self, e: self.func( 719 "DATE_DIFF", 720 f"'{e.args.get('unit') or 'DAY'}'", 721 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 722 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 723 ), 724 exp.UnixToStr: lambda self, e: self.func( 725 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 726 ), 727 exp.DatetimeTrunc: lambda self, e: self.func( 728 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 729 ), 730 exp.UnixToTime: _unix_to_time_sql, 731 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 732 exp.VariancePop: rename_func("VAR_POP"), 733 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 734 exp.Xor: bool_xor_sql, 735 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 736 rename_func("LEVENSHTEIN") 737 ), 738 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 739 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 740 exp.DateBin: rename_func("TIME_BUCKET"), 741 } 742 743 SUPPORTED_JSON_PATH_PARTS = { 744 exp.JSONPathKey, 745 exp.JSONPathRoot, 746 exp.JSONPathSubscript, 747 exp.JSONPathWildcard, 748 } 749 750 TYPE_MAPPING = { 751 **generator.Generator.TYPE_MAPPING, 752 exp.DataType.Type.BINARY: "BLOB", 753 exp.DataType.Type.BPCHAR: "TEXT", 754 exp.DataType.Type.CHAR: "TEXT", 755 exp.DataType.Type.DATETIME: "TIMESTAMP", 756 exp.DataType.Type.FLOAT: "REAL", 757 exp.DataType.Type.JSONB: "JSON", 758 exp.DataType.Type.NCHAR: "TEXT", 759 exp.DataType.Type.NVARCHAR: "TEXT", 760 exp.DataType.Type.UINT: "UINTEGER", 761 exp.DataType.Type.VARBINARY: "BLOB", 762 exp.DataType.Type.ROWVERSION: "BLOB", 763 exp.DataType.Type.VARCHAR: "TEXT", 764 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 765 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 766 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 767 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 768 } 769 770 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 771 RESERVED_KEYWORDS = { 772 "array", 773 "analyse", 774 "union", 775 "all", 776 "when", 777 "in_p", 778 "default", 779 "create_p", 780 "window", 781 "asymmetric", 782 "to", 783 "else", 784 "localtime", 785 "from", 786 "end_p", 787 "select", 788 "current_date", 789 "foreign", 790 "with", 791 "grant", 792 "session_user", 793 "or", 794 "except", 795 "references", 796 "fetch", 797 "limit", 798 "group_p", 799 "leading", 800 "into", 801 "collate", 802 "offset", 803 "do", 804 "then", 805 "localtimestamp", 806 "check_p", 807 "lateral_p", 808 "current_role", 809 "where", 810 "asc_p", 811 "placing", 812 "desc_p", 813 "user", 814 "unique", 815 "initially", 816 "column", 817 "both", 818 "some", 819 "as", 820 "any", 821 "only", 822 "deferrable", 823 "null_p", 824 "current_time", 825 "true_p", 826 "table", 827 "case", 828 "trailing", 829 "variadic", 830 "for", 831 "on", 832 "distinct", 833 "false_p", 834 "not", 835 "constraint", 836 "current_timestamp", 837 "returning", 838 "primary", 839 "intersect", 840 "having", 841 "analyze", 842 "current_user", 843 "and", 844 "cast", 845 "symmetric", 846 "using", 847 "order", 848 "current_catalog", 849 } 850 851 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 852 853 # DuckDB doesn't generally support CREATE TABLE .. properties 854 # https://duckdb.org/docs/sql/statements/create_table.html 855 PROPERTIES_LOCATION = { 856 prop: exp.Properties.Location.UNSUPPORTED 857 for prop in generator.Generator.PROPERTIES_LOCATION 858 } 859 860 # There are a few exceptions (e.g. temporary tables) which are supported or 861 # can be transpiled to DuckDB, so we explicitly override them accordingly 862 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 863 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 864 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 865 866 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 867 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 868 869 def strtotime_sql(self, expression: exp.StrToTime) -> str: 870 if expression.args.get("safe"): 871 formatted_time = self.format_time(expression) 872 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 873 return str_to_time_sql(self, expression) 874 875 def strtodate_sql(self, expression: exp.StrToDate) -> str: 876 if expression.args.get("safe"): 877 formatted_time = self.format_time(expression) 878 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 879 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 880 881 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 882 arg = expression.this 883 if expression.args.get("safe"): 884 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 885 return self.func("JSON", arg) 886 887 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 888 nano = expression.args.get("nano") 889 if nano is not None: 890 expression.set( 891 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 892 ) 893 894 return rename_func("MAKE_TIME")(self, expression) 895 896 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 897 sec = expression.args["sec"] 898 899 milli = expression.args.get("milli") 900 if milli is not None: 901 sec += milli.pop() / exp.Literal.number(1000.0) 902 903 nano = expression.args.get("nano") 904 if nano is not None: 905 sec += nano.pop() / exp.Literal.number(1000000000.0) 906 907 if milli or nano: 908 expression.set("sec", sec) 909 910 return rename_func("MAKE_TIMESTAMP")(self, expression) 911 912 def tablesample_sql( 913 self, 914 expression: exp.TableSample, 915 tablesample_keyword: t.Optional[str] = None, 916 ) -> str: 917 if not isinstance(expression.parent, exp.Select): 918 # This sample clause only applies to a single source, not the entire resulting relation 919 tablesample_keyword = "TABLESAMPLE" 920 921 if expression.args.get("size"): 922 method = expression.args.get("method") 923 if method and method.name.upper() != "RESERVOIR": 924 self.unsupported( 925 f"Sampling method {method} is not supported with a discrete sample count, " 926 "defaulting to reservoir sampling" 927 ) 928 expression.set("method", exp.var("RESERVOIR")) 929 930 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 931 932 def interval_sql(self, expression: exp.Interval) -> str: 933 multiplier: t.Optional[int] = None 934 unit = expression.text("unit").lower() 935 936 if unit.startswith("week"): 937 multiplier = 7 938 if unit.startswith("quarter"): 939 multiplier = 90 940 941 if multiplier: 942 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 943 944 return super().interval_sql(expression) 945 946 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 947 if isinstance(expression.parent, exp.UserDefinedFunction): 948 return self.sql(expression, "this") 949 return super().columndef_sql(expression, sep) 950 951 def join_sql(self, expression: exp.Join) -> str: 952 if ( 953 expression.side == "LEFT" 954 and not expression.args.get("on") 955 and isinstance(expression.this, exp.Unnest) 956 ): 957 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 958 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 959 return super().join_sql(expression.on(exp.true())) 960 961 return super().join_sql(expression) 962 963 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 964 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 965 if expression.args.get("is_end_exclusive"): 966 return rename_func("RANGE")(self, expression) 967 968 return self.function_fallback_sql(expression) 969 970 def countif_sql(self, expression: exp.CountIf) -> str: 971 if self.dialect.version >= Version("1.2"): 972 return self.function_fallback_sql(expression) 973 974 # https://github.com/tobymao/sqlglot/pull/4749 975 return count_if_to_sum(self, expression) 976 977 def bracket_sql(self, expression: exp.Bracket) -> str: 978 if self.dialect.version >= Version("1.2"): 979 return super().bracket_sql(expression) 980 981 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 982 this = expression.this 983 if isinstance(this, exp.Array): 984 this.replace(exp.paren(this)) 985 986 bracket = super().bracket_sql(expression) 987 988 if not expression.args.get("returns_list_for_maps"): 989 if not this.type: 990 from sqlglot.optimizer.annotate_types import annotate_types 991 992 this = annotate_types(this) 993 994 if this.is_type(exp.DataType.Type.MAP): 995 bracket = f"({bracket})[1]" 996 997 return bracket 998 999 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1000 expression_sql = self.sql(expression, "expression") 1001 1002 func = expression.this 1003 if isinstance(func, exp.PERCENTILES): 1004 # Make the order key the first arg and slide the fraction to the right 1005 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1006 order_col = expression.find(exp.Ordered) 1007 if order_col: 1008 func.set("expression", func.this) 1009 func.set("this", order_col.this) 1010 1011 this = self.sql(expression, "this").rstrip(")") 1012 1013 return f"{this}{expression_sql})" 1014 1015 def length_sql(self, expression: exp.Length) -> str: 1016 arg = expression.this 1017 1018 # Dialects like BQ and Snowflake also accept binary values as args, so 1019 # DDB will attempt to infer the type or resort to case/when resolution 1020 if not expression.args.get("binary") or arg.is_string: 1021 return self.func("LENGTH", arg) 1022 1023 if not arg.type: 1024 from sqlglot.optimizer.annotate_types import annotate_types 1025 1026 arg = annotate_types(arg) 1027 1028 if arg.is_type(*exp.DataType.TEXT_TYPES): 1029 return self.func("LENGTH", arg) 1030 1031 # We need these casts to make duckdb's static type checker happy 1032 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1033 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1034 1035 case = ( 1036 exp.case(self.func("TYPEOF", arg)) 1037 .when( 1038 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 1039 ) # anonymous to break length_sql recursion 1040 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1041 ) 1042 1043 return self.sql(case) 1044 1045 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1046 this = expression.this 1047 key = expression.args.get("key") 1048 key_sql = key.name if isinstance(key, exp.Expression) else "" 1049 value_sql = self.sql(expression, "value") 1050 1051 kv_sql = f"{key_sql} := {value_sql}" 1052 1053 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1054 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1055 if isinstance(this, exp.Struct) and not this.expressions: 1056 return self.func("STRUCT_PACK", kv_sql) 1057 1058 return self.func("STRUCT_INSERT", this, kv_sql) 1059 1060 def unnest_sql(self, expression: exp.Unnest) -> str: 1061 explode_array = expression.args.get("explode_array") 1062 if explode_array: 1063 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1064 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1065 expression.expressions.append( 1066 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1067 ) 1068 1069 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1070 alias = expression.args.get("alias") 1071 if alias: 1072 expression.set("alias", None) 1073 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1074 1075 unnest_sql = super().unnest_sql(expression) 1076 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1077 return self.sql(select) 1078 1079 return super().unnest_sql(expression) 1080 1081 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1082 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1083 # DuckDB should render IGNORE NULLS only for the general-purpose 1084 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1085 return super().ignorenulls_sql(expression) 1086 1087 return self.sql(expression, "this") 1088 1089 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1090 this = self.sql(expression, "this") 1091 null_text = self.sql(expression, "null") 1092 1093 if null_text: 1094 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1095 1096 return self.func("ARRAY_TO_STRING", this, expression.expression) 1097 1098 @unsupported_args("position", "occurrence") 1099 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1100 group = expression.args.get("group") 1101 params = expression.args.get("parameters") 1102 1103 # Do not render group if there is no following argument, 1104 # and it's the default value for this dialect 1105 if ( 1106 not params 1107 and group 1108 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1109 ): 1110 group = None 1111 return self.func( 1112 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1113 ) 1114 1115 @unsupported_args("culture") 1116 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1117 fmt = expression.args.get("format") 1118 if fmt and fmt.is_int: 1119 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1120 1121 self.unsupported("Only integer formats are supported by NumberToStr") 1122 return self.function_fallback_sql(expression) 1123 1124 def autoincrementcolumnconstraint_sql(self, _) -> str: 1125 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1126 return ""
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
875 def strtodate_sql(self, expression: exp.StrToDate) -> str: 876 if expression.args.get("safe"): 877 formatted_time = self.format_time(expression) 878 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 879 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
887 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 888 nano = expression.args.get("nano") 889 if nano is not None: 890 expression.set( 891 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 892 ) 893 894 return rename_func("MAKE_TIME")(self, expression)
896 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 897 sec = expression.args["sec"] 898 899 milli = expression.args.get("milli") 900 if milli is not None: 901 sec += milli.pop() / exp.Literal.number(1000.0) 902 903 nano = expression.args.get("nano") 904 if nano is not None: 905 sec += nano.pop() / exp.Literal.number(1000000000.0) 906 907 if milli or nano: 908 expression.set("sec", sec) 909 910 return rename_func("MAKE_TIMESTAMP")(self, expression)
912 def tablesample_sql( 913 self, 914 expression: exp.TableSample, 915 tablesample_keyword: t.Optional[str] = None, 916 ) -> str: 917 if not isinstance(expression.parent, exp.Select): 918 # This sample clause only applies to a single source, not the entire resulting relation 919 tablesample_keyword = "TABLESAMPLE" 920 921 if expression.args.get("size"): 922 method = expression.args.get("method") 923 if method and method.name.upper() != "RESERVOIR": 924 self.unsupported( 925 f"Sampling method {method} is not supported with a discrete sample count, " 926 "defaulting to reservoir sampling" 927 ) 928 expression.set("method", exp.var("RESERVOIR")) 929 930 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
932 def interval_sql(self, expression: exp.Interval) -> str: 933 multiplier: t.Optional[int] = None 934 unit = expression.text("unit").lower() 935 936 if unit.startswith("week"): 937 multiplier = 7 938 if unit.startswith("quarter"): 939 multiplier = 90 940 941 if multiplier: 942 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 943 944 return super().interval_sql(expression)
951 def join_sql(self, expression: exp.Join) -> str: 952 if ( 953 expression.side == "LEFT" 954 and not expression.args.get("on") 955 and isinstance(expression.this, exp.Unnest) 956 ): 957 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 958 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 959 return super().join_sql(expression.on(exp.true())) 960 961 return super().join_sql(expression)
977 def bracket_sql(self, expression: exp.Bracket) -> str: 978 if self.dialect.version >= Version("1.2"): 979 return super().bracket_sql(expression) 980 981 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 982 this = expression.this 983 if isinstance(this, exp.Array): 984 this.replace(exp.paren(this)) 985 986 bracket = super().bracket_sql(expression) 987 988 if not expression.args.get("returns_list_for_maps"): 989 if not this.type: 990 from sqlglot.optimizer.annotate_types import annotate_types 991 992 this = annotate_types(this) 993 994 if this.is_type(exp.DataType.Type.MAP): 995 bracket = f"({bracket})[1]" 996 997 return bracket
999 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1000 expression_sql = self.sql(expression, "expression") 1001 1002 func = expression.this 1003 if isinstance(func, exp.PERCENTILES): 1004 # Make the order key the first arg and slide the fraction to the right 1005 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1006 order_col = expression.find(exp.Ordered) 1007 if order_col: 1008 func.set("expression", func.this) 1009 func.set("this", order_col.this) 1010 1011 this = self.sql(expression, "this").rstrip(")") 1012 1013 return f"{this}{expression_sql})"
1015 def length_sql(self, expression: exp.Length) -> str: 1016 arg = expression.this 1017 1018 # Dialects like BQ and Snowflake also accept binary values as args, so 1019 # DDB will attempt to infer the type or resort to case/when resolution 1020 if not expression.args.get("binary") or arg.is_string: 1021 return self.func("LENGTH", arg) 1022 1023 if not arg.type: 1024 from sqlglot.optimizer.annotate_types import annotate_types 1025 1026 arg = annotate_types(arg) 1027 1028 if arg.is_type(*exp.DataType.TEXT_TYPES): 1029 return self.func("LENGTH", arg) 1030 1031 # We need these casts to make duckdb's static type checker happy 1032 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1033 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1034 1035 case = ( 1036 exp.case(self.func("TYPEOF", arg)) 1037 .when( 1038 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 1039 ) # anonymous to break length_sql recursion 1040 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1041 ) 1042 1043 return self.sql(case)
1045 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1046 this = expression.this 1047 key = expression.args.get("key") 1048 key_sql = key.name if isinstance(key, exp.Expression) else "" 1049 value_sql = self.sql(expression, "value") 1050 1051 kv_sql = f"{key_sql} := {value_sql}" 1052 1053 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1054 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1055 if isinstance(this, exp.Struct) and not this.expressions: 1056 return self.func("STRUCT_PACK", kv_sql) 1057 1058 return self.func("STRUCT_INSERT", this, kv_sql)
1060 def unnest_sql(self, expression: exp.Unnest) -> str: 1061 explode_array = expression.args.get("explode_array") 1062 if explode_array: 1063 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1064 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1065 expression.expressions.append( 1066 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1067 ) 1068 1069 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1070 alias = expression.args.get("alias") 1071 if alias: 1072 expression.set("alias", None) 1073 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1074 1075 unnest_sql = super().unnest_sql(expression) 1076 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1077 return self.sql(select) 1078 1079 return super().unnest_sql(expression)
1081 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1082 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1083 # DuckDB should render IGNORE NULLS only for the general-purpose 1084 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1085 return super().ignorenulls_sql(expression) 1086 1087 return self.sql(expression, "this")
1089 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1090 this = self.sql(expression, "this") 1091 null_text = self.sql(expression, "null") 1092 1093 if null_text: 1094 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1095 1096 return self.func("ARRAY_TO_STRING", this, expression.expression)
1098 @unsupported_args("position", "occurrence") 1099 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1100 group = expression.args.get("group") 1101 params = expression.args.get("parameters") 1102 1103 # Do not render group if there is no following argument, 1104 # and it's the default value for this dialect 1105 if ( 1106 not params 1107 and group 1108 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1109 ): 1110 group = None 1111 return self.func( 1112 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1113 )
1115 @unsupported_args("culture") 1116 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1117 fmt = expression.args.get("format") 1118 if fmt and fmt.is_int: 1119 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1120 1121 self.unsupported("Only integer formats are supported by NumberToStr") 1122 return self.function_fallback_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- put_sql