sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_trunc_to_time, 19 datestrtodate_sql, 20 no_datetime_sql, 21 encode_decode_sql, 22 build_formatted_time, 23 inline_array_unless_query, 24 no_comment_column_constraint_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 strposition_sql, 30 str_to_time_sql, 31 timestamptrunc_sql, 32 timestrtotime_sql, 33 unit_to_var, 34 unit_to_str, 35 sha256_sql, 36 build_regexp_extract, 37 explode_to_unnest_sql, 38 no_make_interval_sql, 39 groupconcat_sql, 40) 41from sqlglot.generator import unsupported_args 42from sqlglot.helper import seq_get 43from sqlglot.tokens import TokenType 44from sqlglot.parser import binary_range_parser 45 46DATETIME_DELTA = t.Union[ 47 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 48] 49 50WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 51 exp.FirstValue, 52 exp.LastValue, 53 exp.Lag, 54 exp.Lead, 55 exp.NthValue, 56) 57 58 59def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 60 this = expression.this 61 unit = unit_to_var(expression) 62 op = ( 63 "+" 64 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 65 else "-" 66 ) 67 68 to_type: t.Optional[DATA_TYPE] = None 69 if isinstance(expression, exp.TsOrDsAdd): 70 to_type = expression.return_type 71 elif this.is_string: 72 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 73 to_type = ( 74 exp.DataType.Type.DATETIME 75 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 76 else exp.DataType.Type.DATE 77 ) 78 79 this = exp.cast(this, to_type) if to_type else this 80 81 expr = expression.expression 82 interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit) 83 84 return f"{self.sql(this)} {op} {self.sql(interval)}" 85 86 87# BigQuery -> DuckDB conversion for the DATE function 88def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 89 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 90 zone = self.sql(expression, "zone") 91 92 if zone: 93 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 94 date_str = f"{date_str} || ' ' || {zone}" 95 96 # This will create a TIMESTAMP with time zone information 97 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 98 99 return result 100 101 102# BigQuery -> DuckDB conversion for the TIME_DIFF function 103def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 104 this = exp.cast(expression.this, exp.DataType.Type.TIME) 105 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 106 107 # Although the 2 dialects share similar signatures, BQ seems to inverse 108 # the sign of the result so the start/end time operands are flipped 109 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 110 111 112@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 113def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 114 return self.func("ARRAY_SORT", expression.this) 115 116 117def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 118 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 119 return self.func(name, expression.this) 120 121 122def _build_sort_array_desc(args: t.List) -> exp.Expression: 123 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 124 125 126def _build_date_diff(args: t.List) -> exp.Expression: 127 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 128 129 130def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 131 def _builder(args: t.List) -> exp.GenerateSeries: 132 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 133 if len(args) == 1: 134 # DuckDB uses 0 as a default for the series' start when it's omitted 135 args.insert(0, exp.Literal.number("0")) 136 137 gen_series = exp.GenerateSeries.from_arg_list(args) 138 gen_series.set("is_end_exclusive", end_exclusive) 139 140 return gen_series 141 142 return _builder 143 144 145def _build_make_timestamp(args: t.List) -> exp.Expression: 146 if len(args) == 1: 147 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 148 149 return exp.TimestampFromParts( 150 year=seq_get(args, 0), 151 month=seq_get(args, 1), 152 day=seq_get(args, 2), 153 hour=seq_get(args, 3), 154 min=seq_get(args, 4), 155 sec=seq_get(args, 5), 156 ) 157 158 159def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 160 args: t.List[str] = [] 161 162 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 163 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 164 # The transformation to ROW will take place if: 165 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 166 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 167 ancestor_cast = expression.find_ancestor(exp.Cast) 168 is_bq_inline_struct = ( 169 (expression.find(exp.PropertyEQ) is None) 170 and ancestor_cast 171 and any( 172 casted_type.is_type(exp.DataType.Type.STRUCT) 173 for casted_type in ancestor_cast.find_all(exp.DataType) 174 ) 175 ) 176 177 for i, expr in enumerate(expression.expressions): 178 is_property_eq = isinstance(expr, exp.PropertyEQ) 179 value = expr.expression if is_property_eq else expr 180 181 if is_bq_inline_struct: 182 args.append(self.sql(value)) 183 else: 184 key = expr.name if is_property_eq else f"_{i}" 185 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 186 187 csv_args = ", ".join(args) 188 189 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 190 191 192def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 193 if expression.is_type("array"): 194 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 195 196 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 197 if expression.is_type( 198 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 199 ): 200 return expression.this.value 201 202 return self.datatype_sql(expression) 203 204 205def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 206 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 207 return f"CAST({sql} AS TEXT)" 208 209 210def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 211 scale = expression.args.get("scale") 212 timestamp = expression.this 213 214 if scale in (None, exp.UnixToTime.SECONDS): 215 return self.func("TO_TIMESTAMP", timestamp) 216 if scale == exp.UnixToTime.MILLIS: 217 return self.func("EPOCH_MS", timestamp) 218 if scale == exp.UnixToTime.MICROS: 219 return self.func("MAKE_TIMESTAMP", timestamp) 220 221 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 222 223 224WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 225 226 227def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 228 arrow_sql = arrow_json_extract_sql(self, expression) 229 if not expression.same_parent and isinstance( 230 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 231 ): 232 arrow_sql = self.wrap(arrow_sql) 233 return arrow_sql 234 235 236def _implicit_datetime_cast( 237 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 238) -> t.Optional[exp.Expression]: 239 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 240 241 242def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 243 this = _implicit_datetime_cast(expression.this) 244 expr = _implicit_datetime_cast(expression.expression) 245 246 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 247 248 249def _generate_datetime_array_sql( 250 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 251) -> str: 252 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 253 254 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 255 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 256 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 257 258 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 259 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 260 start=start, end=end, step=expression.args.get("step") 261 ) 262 263 if is_generate_date_array: 264 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 265 # GENERATE_DATE_ARRAY we must cast it back to DATE array 266 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 267 268 return self.sql(gen_series) 269 270 271def _json_extract_value_array_sql( 272 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 273) -> str: 274 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 275 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 276 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 277 278 279class DuckDB(Dialect): 280 NULL_ORDERING = "nulls_are_last" 281 SUPPORTS_USER_DEFINED_TYPES = True 282 SAFE_DIVISION = True 283 INDEX_OFFSET = 1 284 CONCAT_COALESCE = True 285 SUPPORTS_ORDER_BY_ALL = True 286 SUPPORTS_FIXED_SIZE_ARRAYS = True 287 STRICT_JSON_PATH_SYNTAX = False 288 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 289 290 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 291 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 292 293 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 294 if isinstance(path, exp.Literal): 295 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 296 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 297 # This check ensures we'll avoid trying to parse these as JSON paths, which can 298 # either result in a noisy warning or in an invalid representation of the path. 299 path_text = path.name 300 if path_text.startswith("/") or "[#" in path_text: 301 return path 302 303 return super().to_json_path(path) 304 305 class Tokenizer(tokens.Tokenizer): 306 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 307 HEREDOC_STRINGS = ["$"] 308 309 HEREDOC_TAG_IS_IDENTIFIER = True 310 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 311 312 KEYWORDS = { 313 **tokens.Tokenizer.KEYWORDS, 314 "//": TokenType.DIV, 315 "**": TokenType.DSTAR, 316 "^@": TokenType.CARET_AT, 317 "@>": TokenType.AT_GT, 318 "<@": TokenType.LT_AT, 319 "ATTACH": TokenType.ATTACH, 320 "BINARY": TokenType.VARBINARY, 321 "BITSTRING": TokenType.BIT, 322 "BPCHAR": TokenType.TEXT, 323 "CHAR": TokenType.TEXT, 324 "CHARACTER VARYING": TokenType.TEXT, 325 "DETACH": TokenType.DETACH, 326 "EXCLUDE": TokenType.EXCEPT, 327 "LOGICAL": TokenType.BOOLEAN, 328 "ONLY": TokenType.ONLY, 329 "PIVOT_WIDER": TokenType.PIVOT, 330 "POSITIONAL": TokenType.POSITIONAL, 331 "SIGNED": TokenType.INT, 332 "STRING": TokenType.TEXT, 333 "SUMMARIZE": TokenType.SUMMARIZE, 334 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 335 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 336 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 337 "TIMESTAMP_US": TokenType.TIMESTAMP, 338 "UBIGINT": TokenType.UBIGINT, 339 "UINTEGER": TokenType.UINT, 340 "USMALLINT": TokenType.USMALLINT, 341 "UTINYINT": TokenType.UTINYINT, 342 "VARCHAR": TokenType.TEXT, 343 } 344 KEYWORDS.pop("/*+") 345 346 SINGLE_TOKENS = { 347 **tokens.Tokenizer.SINGLE_TOKENS, 348 "$": TokenType.PARAMETER, 349 } 350 351 class Parser(parser.Parser): 352 BITWISE = { 353 **parser.Parser.BITWISE, 354 TokenType.TILDA: exp.RegexpLike, 355 } 356 BITWISE.pop(TokenType.CARET) 357 358 RANGE_PARSERS = { 359 **parser.Parser.RANGE_PARSERS, 360 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 361 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 362 } 363 364 EXPONENT = { 365 **parser.Parser.EXPONENT, 366 TokenType.CARET: exp.Pow, 367 TokenType.DSTAR: exp.Pow, 368 } 369 370 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 371 372 FUNCTIONS = { 373 **parser.Parser.FUNCTIONS, 374 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 375 "ARRAY_SORT": exp.SortArray.from_arg_list, 376 "DATEDIFF": _build_date_diff, 377 "DATE_DIFF": _build_date_diff, 378 "DATE_TRUNC": date_trunc_to_time, 379 "DATETRUNC": date_trunc_to_time, 380 "DECODE": lambda args: exp.Decode( 381 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 382 ), 383 "EDITDIST3": exp.Levenshtein.from_arg_list, 384 "ENCODE": lambda args: exp.Encode( 385 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 386 ), 387 "EPOCH": exp.TimeToUnix.from_arg_list, 388 "EPOCH_MS": lambda args: exp.UnixToTime( 389 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 390 ), 391 "GENERATE_SERIES": _build_generate_series(), 392 "JSON": exp.ParseJSON.from_arg_list, 393 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 394 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 395 "LIST_HAS": exp.ArrayContains.from_arg_list, 396 "LIST_REVERSE_SORT": _build_sort_array_desc, 397 "LIST_SORT": exp.SortArray.from_arg_list, 398 "LIST_VALUE": lambda args: exp.Array(expressions=args), 399 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 400 "MAKE_TIMESTAMP": _build_make_timestamp, 401 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 402 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 403 "RANGE": _build_generate_series(end_exclusive=True), 404 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 405 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 406 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 407 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 408 this=seq_get(args, 0), 409 expression=seq_get(args, 1), 410 replacement=seq_get(args, 2), 411 modifiers=seq_get(args, 3), 412 ), 413 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 414 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 415 "STRING_SPLIT": exp.Split.from_arg_list, 416 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 417 "STRING_TO_ARRAY": exp.Split.from_arg_list, 418 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 419 "STRUCT_PACK": exp.Struct.from_arg_list, 420 "STR_SPLIT": exp.Split.from_arg_list, 421 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 422 "TIME_BUCKET": exp.DateBin.from_arg_list, 423 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 424 "UNNEST": exp.Explode.from_arg_list, 425 "XOR": binary_from_function(exp.BitwiseXor), 426 } 427 428 FUNCTIONS.pop("DATE_SUB") 429 FUNCTIONS.pop("GLOB") 430 431 FUNCTION_PARSERS = { 432 **parser.Parser.FUNCTION_PARSERS, 433 **dict.fromkeys( 434 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 435 ), 436 } 437 FUNCTION_PARSERS.pop("DECODE") 438 439 NO_PAREN_FUNCTION_PARSERS = { 440 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 441 "MAP": lambda self: self._parse_map(), 442 } 443 444 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 445 TokenType.SEMI, 446 TokenType.ANTI, 447 } 448 449 PLACEHOLDER_PARSERS = { 450 **parser.Parser.PLACEHOLDER_PARSERS, 451 TokenType.PARAMETER: lambda self: ( 452 self.expression(exp.Placeholder, this=self._prev.text) 453 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 454 else None 455 ), 456 } 457 458 TYPE_CONVERTERS = { 459 # https://duckdb.org/docs/sql/data_types/numeric 460 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 461 # https://duckdb.org/docs/sql/data_types/text 462 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 463 } 464 465 STATEMENT_PARSERS = { 466 **parser.Parser.STATEMENT_PARSERS, 467 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 468 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 469 } 470 471 def _parse_expression(self) -> t.Optional[exp.Expression]: 472 # DuckDB supports prefix aliases, e.g. foo: 1 473 if self._next and self._next.token_type == TokenType.COLON: 474 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 475 self._match(TokenType.COLON) 476 comments = self._prev_comments or [] 477 478 this = self._parse_assignment() 479 if isinstance(this, exp.Expression): 480 # Moves the comment next to the alias in `alias: expr /* comment */` 481 comments += this.pop_comments() or [] 482 483 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 484 485 return super()._parse_expression() 486 487 def _parse_table( 488 self, 489 schema: bool = False, 490 joins: bool = False, 491 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 492 parse_bracket: bool = False, 493 is_db_reference: bool = False, 494 parse_partition: bool = False, 495 ) -> t.Optional[exp.Expression]: 496 # DuckDB supports prefix aliases, e.g. FROM foo: bar 497 if self._next and self._next.token_type == TokenType.COLON: 498 alias = self._parse_table_alias( 499 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 500 ) 501 self._match(TokenType.COLON) 502 comments = self._prev_comments or [] 503 else: 504 alias = None 505 comments = [] 506 507 table = super()._parse_table( 508 schema=schema, 509 joins=joins, 510 alias_tokens=alias_tokens, 511 parse_bracket=parse_bracket, 512 is_db_reference=is_db_reference, 513 parse_partition=parse_partition, 514 ) 515 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 516 # Moves the comment next to the alias in `alias: table /* comment */` 517 comments += table.pop_comments() or [] 518 alias.comments = alias.pop_comments() + comments 519 table.set("alias", alias) 520 521 return table 522 523 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 524 # https://duckdb.org/docs/sql/samples.html 525 sample = super()._parse_table_sample(as_modifier=as_modifier) 526 if sample and not sample.args.get("method"): 527 if sample.args.get("size"): 528 sample.set("method", exp.var("RESERVOIR")) 529 else: 530 sample.set("method", exp.var("SYSTEM")) 531 532 return sample 533 534 def _parse_bracket( 535 self, this: t.Optional[exp.Expression] = None 536 ) -> t.Optional[exp.Expression]: 537 bracket = super()._parse_bracket(this) 538 539 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 540 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 541 bracket.set("returns_list_for_maps", True) 542 543 return bracket 544 545 def _parse_map(self) -> exp.ToMap | exp.Map: 546 if self._match(TokenType.L_BRACE, advance=False): 547 return self.expression(exp.ToMap, this=self._parse_bracket()) 548 549 args = self._parse_wrapped_csv(self._parse_assignment) 550 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 551 552 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 553 return self._parse_field_def() 554 555 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 556 if len(aggregations) == 1: 557 return super()._pivot_column_names(aggregations) 558 return pivot_column_names(aggregations, dialect="duckdb") 559 560 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 561 def _parse_attach_option() -> exp.AttachOption: 562 return self.expression( 563 exp.AttachOption, 564 this=self._parse_var(any_token=True), 565 expression=self._parse_field(any_token=True), 566 ) 567 568 self._match(TokenType.DATABASE) 569 exists = self._parse_exists(not_=is_attach) 570 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 571 572 if self._match(TokenType.L_PAREN, advance=False): 573 expressions = self._parse_wrapped_csv(_parse_attach_option) 574 else: 575 expressions = None 576 577 return ( 578 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 579 if is_attach 580 else self.expression(exp.Detach, this=this, exists=exists) 581 ) 582 583 class Generator(generator.Generator): 584 PARAMETER_TOKEN = "$" 585 NAMED_PLACEHOLDER_TOKEN = "$" 586 JOIN_HINTS = False 587 TABLE_HINTS = False 588 QUERY_HINTS = False 589 LIMIT_FETCH = "LIMIT" 590 STRUCT_DELIMITER = ("(", ")") 591 RENAME_TABLE_WITH_DB = False 592 NVL2_SUPPORTED = False 593 SEMI_ANTI_JOIN_WITH_SIDE = False 594 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 595 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 596 LAST_DAY_SUPPORTS_DATE_PART = False 597 JSON_KEY_VALUE_PAIR_SEP = "," 598 IGNORE_NULLS_IN_FUNC = True 599 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 600 SUPPORTS_CREATE_TABLE_LIKE = False 601 MULTI_ARG_DISTINCT = False 602 CAN_IMPLEMENT_ARRAY_ANY = True 603 SUPPORTS_TO_NUMBER = False 604 COPY_HAS_INTO_KEYWORD = False 605 STAR_EXCEPT = "EXCLUDE" 606 PAD_FILL_PATTERN_IS_REQUIRED = True 607 ARRAY_CONCAT_IS_VAR_LEN = False 608 ARRAY_SIZE_DIM_REQUIRED = False 609 610 TRANSFORMS = { 611 **generator.Generator.TRANSFORMS, 612 exp.ApproxDistinct: approx_count_distinct_sql, 613 exp.Array: inline_array_unless_query, 614 exp.ArrayFilter: rename_func("LIST_FILTER"), 615 exp.ArraySort: _array_sort_sql, 616 exp.ArraySum: rename_func("LIST_SUM"), 617 exp.BitwiseXor: rename_func("XOR"), 618 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 619 exp.CurrentDate: lambda *_: "CURRENT_DATE", 620 exp.CurrentTime: lambda *_: "CURRENT_TIME", 621 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 622 exp.DayOfMonth: rename_func("DAYOFMONTH"), 623 exp.DayOfWeek: rename_func("DAYOFWEEK"), 624 exp.DayOfWeekIso: rename_func("ISODOW"), 625 exp.DayOfYear: rename_func("DAYOFYEAR"), 626 exp.DataType: _datatype_sql, 627 exp.Date: _date_sql, 628 exp.DateAdd: _date_delta_sql, 629 exp.DateFromParts: rename_func("MAKE_DATE"), 630 exp.DateSub: _date_delta_sql, 631 exp.DateDiff: _date_diff_sql, 632 exp.DateStrToDate: datestrtodate_sql, 633 exp.Datetime: no_datetime_sql, 634 exp.DatetimeSub: _date_delta_sql, 635 exp.DatetimeAdd: _date_delta_sql, 636 exp.DateToDi: lambda self, 637 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 638 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 639 exp.DiToDate: lambda self, 640 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 641 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 642 exp.GenerateDateArray: _generate_datetime_array_sql, 643 exp.GenerateTimestampArray: _generate_datetime_array_sql, 644 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 645 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 646 exp.Explode: rename_func("UNNEST"), 647 exp.IntDiv: lambda self, e: self.binary(e, "//"), 648 exp.IsInf: rename_func("ISINF"), 649 exp.IsNan: rename_func("ISNAN"), 650 exp.JSONBExists: rename_func("JSON_EXISTS"), 651 exp.JSONExtract: _arrow_json_extract_sql, 652 exp.JSONExtractArray: _json_extract_value_array_sql, 653 exp.JSONExtractScalar: _arrow_json_extract_sql, 654 exp.JSONFormat: _json_format_sql, 655 exp.JSONValueArray: _json_extract_value_array_sql, 656 exp.Lateral: explode_to_unnest_sql, 657 exp.LogicalOr: rename_func("BOOL_OR"), 658 exp.LogicalAnd: rename_func("BOOL_AND"), 659 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 660 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 661 exp.MonthsBetween: lambda self, e: self.func( 662 "DATEDIFF", 663 "'month'", 664 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 665 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 666 ), 667 exp.PercentileCont: rename_func("QUANTILE_CONT"), 668 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 669 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 670 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 671 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 672 exp.RegexpReplace: lambda self, e: self.func( 673 "REGEXP_REPLACE", 674 e.this, 675 e.expression, 676 e.args.get("replacement"), 677 e.args.get("modifiers"), 678 ), 679 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 680 exp.RegexpILike: lambda self, e: self.func( 681 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 682 ), 683 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 684 exp.Return: lambda self, e: self.sql(e, "this"), 685 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 686 exp.Rand: rename_func("RANDOM"), 687 exp.SHA: rename_func("SHA1"), 688 exp.SHA2: sha256_sql, 689 exp.Split: rename_func("STR_SPLIT"), 690 exp.SortArray: _sort_array_sql, 691 exp.StrPosition: strposition_sql, 692 exp.StrToUnix: lambda self, e: self.func( 693 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 694 ), 695 exp.Struct: _struct_sql, 696 exp.Transform: rename_func("LIST_TRANSFORM"), 697 exp.TimeAdd: _date_delta_sql, 698 exp.Time: no_time_sql, 699 exp.TimeDiff: _timediff_sql, 700 exp.Timestamp: no_timestamp_sql, 701 exp.TimestampDiff: lambda self, e: self.func( 702 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 703 ), 704 exp.TimestampTrunc: timestamptrunc_sql(), 705 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 706 exp.TimeStrToTime: timestrtotime_sql, 707 exp.TimeStrToUnix: lambda self, e: self.func( 708 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 709 ), 710 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 711 exp.TimeToUnix: rename_func("EPOCH"), 712 exp.TsOrDiToDi: lambda self, 713 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 714 exp.TsOrDsAdd: _date_delta_sql, 715 exp.TsOrDsDiff: lambda self, e: self.func( 716 "DATE_DIFF", 717 f"'{e.args.get('unit') or 'DAY'}'", 718 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 719 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 720 ), 721 exp.UnixToStr: lambda self, e: self.func( 722 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 723 ), 724 exp.DatetimeTrunc: lambda self, e: self.func( 725 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 726 ), 727 exp.UnixToTime: _unix_to_time_sql, 728 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 729 exp.VariancePop: rename_func("VAR_POP"), 730 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 731 exp.Xor: bool_xor_sql, 732 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 733 rename_func("LEVENSHTEIN") 734 ), 735 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 736 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 737 exp.DateBin: rename_func("TIME_BUCKET"), 738 } 739 740 SUPPORTED_JSON_PATH_PARTS = { 741 exp.JSONPathKey, 742 exp.JSONPathRoot, 743 exp.JSONPathSubscript, 744 exp.JSONPathWildcard, 745 } 746 747 TYPE_MAPPING = { 748 **generator.Generator.TYPE_MAPPING, 749 exp.DataType.Type.BINARY: "BLOB", 750 exp.DataType.Type.BPCHAR: "TEXT", 751 exp.DataType.Type.CHAR: "TEXT", 752 exp.DataType.Type.DATETIME: "TIMESTAMP", 753 exp.DataType.Type.FLOAT: "REAL", 754 exp.DataType.Type.JSONB: "JSON", 755 exp.DataType.Type.NCHAR: "TEXT", 756 exp.DataType.Type.NVARCHAR: "TEXT", 757 exp.DataType.Type.UINT: "UINTEGER", 758 exp.DataType.Type.VARBINARY: "BLOB", 759 exp.DataType.Type.ROWVERSION: "BLOB", 760 exp.DataType.Type.VARCHAR: "TEXT", 761 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 762 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 763 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 764 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 765 } 766 767 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 768 RESERVED_KEYWORDS = { 769 "array", 770 "analyse", 771 "union", 772 "all", 773 "when", 774 "in_p", 775 "default", 776 "create_p", 777 "window", 778 "asymmetric", 779 "to", 780 "else", 781 "localtime", 782 "from", 783 "end_p", 784 "select", 785 "current_date", 786 "foreign", 787 "with", 788 "grant", 789 "session_user", 790 "or", 791 "except", 792 "references", 793 "fetch", 794 "limit", 795 "group_p", 796 "leading", 797 "into", 798 "collate", 799 "offset", 800 "do", 801 "then", 802 "localtimestamp", 803 "check_p", 804 "lateral_p", 805 "current_role", 806 "where", 807 "asc_p", 808 "placing", 809 "desc_p", 810 "user", 811 "unique", 812 "initially", 813 "column", 814 "both", 815 "some", 816 "as", 817 "any", 818 "only", 819 "deferrable", 820 "null_p", 821 "current_time", 822 "true_p", 823 "table", 824 "case", 825 "trailing", 826 "variadic", 827 "for", 828 "on", 829 "distinct", 830 "false_p", 831 "not", 832 "constraint", 833 "current_timestamp", 834 "returning", 835 "primary", 836 "intersect", 837 "having", 838 "analyze", 839 "current_user", 840 "and", 841 "cast", 842 "symmetric", 843 "using", 844 "order", 845 "current_catalog", 846 } 847 848 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 849 850 # DuckDB doesn't generally support CREATE TABLE .. properties 851 # https://duckdb.org/docs/sql/statements/create_table.html 852 PROPERTIES_LOCATION = { 853 prop: exp.Properties.Location.UNSUPPORTED 854 for prop in generator.Generator.PROPERTIES_LOCATION 855 } 856 857 # There are a few exceptions (e.g. temporary tables) which are supported or 858 # can be transpiled to DuckDB, so we explicitly override them accordingly 859 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 860 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 861 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 862 863 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 864 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 865 866 def strtotime_sql(self, expression: exp.StrToTime) -> str: 867 if expression.args.get("safe"): 868 formatted_time = self.format_time(expression) 869 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 870 return str_to_time_sql(self, expression) 871 872 def strtodate_sql(self, expression: exp.StrToDate) -> str: 873 if expression.args.get("safe"): 874 formatted_time = self.format_time(expression) 875 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 876 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 877 878 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 879 arg = expression.this 880 if expression.args.get("safe"): 881 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 882 return self.func("JSON", arg) 883 884 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 885 nano = expression.args.get("nano") 886 if nano is not None: 887 expression.set( 888 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 889 ) 890 891 return rename_func("MAKE_TIME")(self, expression) 892 893 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 894 sec = expression.args["sec"] 895 896 milli = expression.args.get("milli") 897 if milli is not None: 898 sec += milli.pop() / exp.Literal.number(1000.0) 899 900 nano = expression.args.get("nano") 901 if nano is not None: 902 sec += nano.pop() / exp.Literal.number(1000000000.0) 903 904 if milli or nano: 905 expression.set("sec", sec) 906 907 return rename_func("MAKE_TIMESTAMP")(self, expression) 908 909 def tablesample_sql( 910 self, 911 expression: exp.TableSample, 912 tablesample_keyword: t.Optional[str] = None, 913 ) -> str: 914 if not isinstance(expression.parent, exp.Select): 915 # This sample clause only applies to a single source, not the entire resulting relation 916 tablesample_keyword = "TABLESAMPLE" 917 918 if expression.args.get("size"): 919 method = expression.args.get("method") 920 if method and method.name.upper() != "RESERVOIR": 921 self.unsupported( 922 f"Sampling method {method} is not supported with a discrete sample count, " 923 "defaulting to reservoir sampling" 924 ) 925 expression.set("method", exp.var("RESERVOIR")) 926 927 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 928 929 def interval_sql(self, expression: exp.Interval) -> str: 930 multiplier: t.Optional[int] = None 931 unit = expression.text("unit").lower() 932 933 if unit.startswith("week"): 934 multiplier = 7 935 if unit.startswith("quarter"): 936 multiplier = 90 937 938 if multiplier: 939 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 940 941 return super().interval_sql(expression) 942 943 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 944 if isinstance(expression.parent, exp.UserDefinedFunction): 945 return self.sql(expression, "this") 946 return super().columndef_sql(expression, sep) 947 948 def join_sql(self, expression: exp.Join) -> str: 949 if ( 950 expression.side == "LEFT" 951 and not expression.args.get("on") 952 and isinstance(expression.this, exp.Unnest) 953 ): 954 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 955 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 956 return super().join_sql(expression.on(exp.true())) 957 958 return super().join_sql(expression) 959 960 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 961 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 962 if expression.args.get("is_end_exclusive"): 963 return rename_func("RANGE")(self, expression) 964 965 return self.function_fallback_sql(expression) 966 967 def countif_sql(self, expression: exp.CountIf) -> str: 968 if self.dialect.version >= Version("1.2"): 969 return self.function_fallback_sql(expression) 970 971 # https://github.com/tobymao/sqlglot/pull/4749 972 return count_if_to_sum(self, expression) 973 974 def bracket_sql(self, expression: exp.Bracket) -> str: 975 if self.dialect.version >= Version("1.2"): 976 return super().bracket_sql(expression) 977 978 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 979 this = expression.this 980 if isinstance(this, exp.Array): 981 this.replace(exp.paren(this)) 982 983 bracket = super().bracket_sql(expression) 984 985 if not expression.args.get("returns_list_for_maps"): 986 if not this.type: 987 from sqlglot.optimizer.annotate_types import annotate_types 988 989 this = annotate_types(this) 990 991 if this.is_type(exp.DataType.Type.MAP): 992 bracket = f"({bracket})[1]" 993 994 return bracket 995 996 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 997 expression_sql = self.sql(expression, "expression") 998 999 func = expression.this 1000 if isinstance(func, exp.PERCENTILES): 1001 # Make the order key the first arg and slide the fraction to the right 1002 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1003 order_col = expression.find(exp.Ordered) 1004 if order_col: 1005 func.set("expression", func.this) 1006 func.set("this", order_col.this) 1007 1008 this = self.sql(expression, "this").rstrip(")") 1009 1010 return f"{this}{expression_sql})" 1011 1012 def length_sql(self, expression: exp.Length) -> str: 1013 arg = expression.this 1014 1015 # Dialects like BQ and Snowflake also accept binary values as args, so 1016 # DDB will attempt to infer the type or resort to case/when resolution 1017 if not expression.args.get("binary") or arg.is_string: 1018 return self.func("LENGTH", arg) 1019 1020 if not arg.type: 1021 from sqlglot.optimizer.annotate_types import annotate_types 1022 1023 arg = annotate_types(arg) 1024 1025 if arg.is_type(*exp.DataType.TEXT_TYPES): 1026 return self.func("LENGTH", arg) 1027 1028 # We need these casts to make duckdb's static type checker happy 1029 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1030 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1031 1032 case = ( 1033 exp.case(self.func("TYPEOF", arg)) 1034 .when( 1035 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 1036 ) # anonymous to break length_sql recursion 1037 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1038 ) 1039 1040 return self.sql(case) 1041 1042 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1043 this = expression.this 1044 key = expression.args.get("key") 1045 key_sql = key.name if isinstance(key, exp.Expression) else "" 1046 value_sql = self.sql(expression, "value") 1047 1048 kv_sql = f"{key_sql} := {value_sql}" 1049 1050 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1051 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1052 if isinstance(this, exp.Struct) and not this.expressions: 1053 return self.func("STRUCT_PACK", kv_sql) 1054 1055 return self.func("STRUCT_INSERT", this, kv_sql) 1056 1057 def unnest_sql(self, expression: exp.Unnest) -> str: 1058 explode_array = expression.args.get("explode_array") 1059 if explode_array: 1060 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1061 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1062 expression.expressions.append( 1063 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1064 ) 1065 1066 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1067 alias = expression.args.get("alias") 1068 if alias: 1069 expression.set("alias", None) 1070 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1071 1072 unnest_sql = super().unnest_sql(expression) 1073 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1074 return self.sql(select) 1075 1076 return super().unnest_sql(expression) 1077 1078 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1079 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1080 # DuckDB should render IGNORE NULLS only for the general-purpose 1081 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1082 return super().ignorenulls_sql(expression) 1083 1084 return self.sql(expression, "this") 1085 1086 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1087 this = self.sql(expression, "this") 1088 null_text = self.sql(expression, "null") 1089 1090 if null_text: 1091 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1092 1093 return self.func("ARRAY_TO_STRING", this, expression.expression) 1094 1095 @unsupported_args("position", "occurrence") 1096 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1097 group = expression.args.get("group") 1098 params = expression.args.get("parameters") 1099 1100 # Do not render group if there is no following argument, 1101 # and it's the default value for this dialect 1102 if ( 1103 not params 1104 and group 1105 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1106 ): 1107 group = None 1108 return self.func( 1109 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1110 ) 1111 1112 @unsupported_args("culture") 1113 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1114 fmt = expression.args.get("format") 1115 if fmt and fmt.is_int: 1116 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1117 1118 self.unsupported("Only integer formats are supported by NumberToStr") 1119 return self.function_fallback_sql(expression) 1120 1121 def autoincrementcolumnconstraint_sql(self, _) -> str: 1122 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1123 return ""
280class DuckDB(Dialect): 281 NULL_ORDERING = "nulls_are_last" 282 SUPPORTS_USER_DEFINED_TYPES = True 283 SAFE_DIVISION = True 284 INDEX_OFFSET = 1 285 CONCAT_COALESCE = True 286 SUPPORTS_ORDER_BY_ALL = True 287 SUPPORTS_FIXED_SIZE_ARRAYS = True 288 STRICT_JSON_PATH_SYNTAX = False 289 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 290 291 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 292 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 293 294 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 295 if isinstance(path, exp.Literal): 296 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 297 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 298 # This check ensures we'll avoid trying to parse these as JSON paths, which can 299 # either result in a noisy warning or in an invalid representation of the path. 300 path_text = path.name 301 if path_text.startswith("/") or "[#" in path_text: 302 return path 303 304 return super().to_json_path(path) 305 306 class Tokenizer(tokens.Tokenizer): 307 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 308 HEREDOC_STRINGS = ["$"] 309 310 HEREDOC_TAG_IS_IDENTIFIER = True 311 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 312 313 KEYWORDS = { 314 **tokens.Tokenizer.KEYWORDS, 315 "//": TokenType.DIV, 316 "**": TokenType.DSTAR, 317 "^@": TokenType.CARET_AT, 318 "@>": TokenType.AT_GT, 319 "<@": TokenType.LT_AT, 320 "ATTACH": TokenType.ATTACH, 321 "BINARY": TokenType.VARBINARY, 322 "BITSTRING": TokenType.BIT, 323 "BPCHAR": TokenType.TEXT, 324 "CHAR": TokenType.TEXT, 325 "CHARACTER VARYING": TokenType.TEXT, 326 "DETACH": TokenType.DETACH, 327 "EXCLUDE": TokenType.EXCEPT, 328 "LOGICAL": TokenType.BOOLEAN, 329 "ONLY": TokenType.ONLY, 330 "PIVOT_WIDER": TokenType.PIVOT, 331 "POSITIONAL": TokenType.POSITIONAL, 332 "SIGNED": TokenType.INT, 333 "STRING": TokenType.TEXT, 334 "SUMMARIZE": TokenType.SUMMARIZE, 335 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 336 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 337 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 338 "TIMESTAMP_US": TokenType.TIMESTAMP, 339 "UBIGINT": TokenType.UBIGINT, 340 "UINTEGER": TokenType.UINT, 341 "USMALLINT": TokenType.USMALLINT, 342 "UTINYINT": TokenType.UTINYINT, 343 "VARCHAR": TokenType.TEXT, 344 } 345 KEYWORDS.pop("/*+") 346 347 SINGLE_TOKENS = { 348 **tokens.Tokenizer.SINGLE_TOKENS, 349 "$": TokenType.PARAMETER, 350 } 351 352 class Parser(parser.Parser): 353 BITWISE = { 354 **parser.Parser.BITWISE, 355 TokenType.TILDA: exp.RegexpLike, 356 } 357 BITWISE.pop(TokenType.CARET) 358 359 RANGE_PARSERS = { 360 **parser.Parser.RANGE_PARSERS, 361 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 362 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 363 } 364 365 EXPONENT = { 366 **parser.Parser.EXPONENT, 367 TokenType.CARET: exp.Pow, 368 TokenType.DSTAR: exp.Pow, 369 } 370 371 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 372 373 FUNCTIONS = { 374 **parser.Parser.FUNCTIONS, 375 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 376 "ARRAY_SORT": exp.SortArray.from_arg_list, 377 "DATEDIFF": _build_date_diff, 378 "DATE_DIFF": _build_date_diff, 379 "DATE_TRUNC": date_trunc_to_time, 380 "DATETRUNC": date_trunc_to_time, 381 "DECODE": lambda args: exp.Decode( 382 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 383 ), 384 "EDITDIST3": exp.Levenshtein.from_arg_list, 385 "ENCODE": lambda args: exp.Encode( 386 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 387 ), 388 "EPOCH": exp.TimeToUnix.from_arg_list, 389 "EPOCH_MS": lambda args: exp.UnixToTime( 390 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 391 ), 392 "GENERATE_SERIES": _build_generate_series(), 393 "JSON": exp.ParseJSON.from_arg_list, 394 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 395 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 396 "LIST_HAS": exp.ArrayContains.from_arg_list, 397 "LIST_REVERSE_SORT": _build_sort_array_desc, 398 "LIST_SORT": exp.SortArray.from_arg_list, 399 "LIST_VALUE": lambda args: exp.Array(expressions=args), 400 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 401 "MAKE_TIMESTAMP": _build_make_timestamp, 402 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 403 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 404 "RANGE": _build_generate_series(end_exclusive=True), 405 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 406 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 407 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 408 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 409 this=seq_get(args, 0), 410 expression=seq_get(args, 1), 411 replacement=seq_get(args, 2), 412 modifiers=seq_get(args, 3), 413 ), 414 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 415 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 416 "STRING_SPLIT": exp.Split.from_arg_list, 417 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 418 "STRING_TO_ARRAY": exp.Split.from_arg_list, 419 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 420 "STRUCT_PACK": exp.Struct.from_arg_list, 421 "STR_SPLIT": exp.Split.from_arg_list, 422 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 423 "TIME_BUCKET": exp.DateBin.from_arg_list, 424 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 425 "UNNEST": exp.Explode.from_arg_list, 426 "XOR": binary_from_function(exp.BitwiseXor), 427 } 428 429 FUNCTIONS.pop("DATE_SUB") 430 FUNCTIONS.pop("GLOB") 431 432 FUNCTION_PARSERS = { 433 **parser.Parser.FUNCTION_PARSERS, 434 **dict.fromkeys( 435 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 436 ), 437 } 438 FUNCTION_PARSERS.pop("DECODE") 439 440 NO_PAREN_FUNCTION_PARSERS = { 441 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 442 "MAP": lambda self: self._parse_map(), 443 } 444 445 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 446 TokenType.SEMI, 447 TokenType.ANTI, 448 } 449 450 PLACEHOLDER_PARSERS = { 451 **parser.Parser.PLACEHOLDER_PARSERS, 452 TokenType.PARAMETER: lambda self: ( 453 self.expression(exp.Placeholder, this=self._prev.text) 454 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 455 else None 456 ), 457 } 458 459 TYPE_CONVERTERS = { 460 # https://duckdb.org/docs/sql/data_types/numeric 461 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 462 # https://duckdb.org/docs/sql/data_types/text 463 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 464 } 465 466 STATEMENT_PARSERS = { 467 **parser.Parser.STATEMENT_PARSERS, 468 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 469 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 470 } 471 472 def _parse_expression(self) -> t.Optional[exp.Expression]: 473 # DuckDB supports prefix aliases, e.g. foo: 1 474 if self._next and self._next.token_type == TokenType.COLON: 475 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 476 self._match(TokenType.COLON) 477 comments = self._prev_comments or [] 478 479 this = self._parse_assignment() 480 if isinstance(this, exp.Expression): 481 # Moves the comment next to the alias in `alias: expr /* comment */` 482 comments += this.pop_comments() or [] 483 484 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 485 486 return super()._parse_expression() 487 488 def _parse_table( 489 self, 490 schema: bool = False, 491 joins: bool = False, 492 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 493 parse_bracket: bool = False, 494 is_db_reference: bool = False, 495 parse_partition: bool = False, 496 ) -> t.Optional[exp.Expression]: 497 # DuckDB supports prefix aliases, e.g. FROM foo: bar 498 if self._next and self._next.token_type == TokenType.COLON: 499 alias = self._parse_table_alias( 500 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 501 ) 502 self._match(TokenType.COLON) 503 comments = self._prev_comments or [] 504 else: 505 alias = None 506 comments = [] 507 508 table = super()._parse_table( 509 schema=schema, 510 joins=joins, 511 alias_tokens=alias_tokens, 512 parse_bracket=parse_bracket, 513 is_db_reference=is_db_reference, 514 parse_partition=parse_partition, 515 ) 516 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 517 # Moves the comment next to the alias in `alias: table /* comment */` 518 comments += table.pop_comments() or [] 519 alias.comments = alias.pop_comments() + comments 520 table.set("alias", alias) 521 522 return table 523 524 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 525 # https://duckdb.org/docs/sql/samples.html 526 sample = super()._parse_table_sample(as_modifier=as_modifier) 527 if sample and not sample.args.get("method"): 528 if sample.args.get("size"): 529 sample.set("method", exp.var("RESERVOIR")) 530 else: 531 sample.set("method", exp.var("SYSTEM")) 532 533 return sample 534 535 def _parse_bracket( 536 self, this: t.Optional[exp.Expression] = None 537 ) -> t.Optional[exp.Expression]: 538 bracket = super()._parse_bracket(this) 539 540 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 541 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 542 bracket.set("returns_list_for_maps", True) 543 544 return bracket 545 546 def _parse_map(self) -> exp.ToMap | exp.Map: 547 if self._match(TokenType.L_BRACE, advance=False): 548 return self.expression(exp.ToMap, this=self._parse_bracket()) 549 550 args = self._parse_wrapped_csv(self._parse_assignment) 551 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 552 553 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 554 return self._parse_field_def() 555 556 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 557 if len(aggregations) == 1: 558 return super()._pivot_column_names(aggregations) 559 return pivot_column_names(aggregations, dialect="duckdb") 560 561 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 562 def _parse_attach_option() -> exp.AttachOption: 563 return self.expression( 564 exp.AttachOption, 565 this=self._parse_var(any_token=True), 566 expression=self._parse_field(any_token=True), 567 ) 568 569 self._match(TokenType.DATABASE) 570 exists = self._parse_exists(not_=is_attach) 571 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 572 573 if self._match(TokenType.L_PAREN, advance=False): 574 expressions = self._parse_wrapped_csv(_parse_attach_option) 575 else: 576 expressions = None 577 578 return ( 579 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 580 if is_attach 581 else self.expression(exp.Detach, this=this, exists=exists) 582 ) 583 584 class Generator(generator.Generator): 585 PARAMETER_TOKEN = "$" 586 NAMED_PLACEHOLDER_TOKEN = "$" 587 JOIN_HINTS = False 588 TABLE_HINTS = False 589 QUERY_HINTS = False 590 LIMIT_FETCH = "LIMIT" 591 STRUCT_DELIMITER = ("(", ")") 592 RENAME_TABLE_WITH_DB = False 593 NVL2_SUPPORTED = False 594 SEMI_ANTI_JOIN_WITH_SIDE = False 595 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 596 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 597 LAST_DAY_SUPPORTS_DATE_PART = False 598 JSON_KEY_VALUE_PAIR_SEP = "," 599 IGNORE_NULLS_IN_FUNC = True 600 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 601 SUPPORTS_CREATE_TABLE_LIKE = False 602 MULTI_ARG_DISTINCT = False 603 CAN_IMPLEMENT_ARRAY_ANY = True 604 SUPPORTS_TO_NUMBER = False 605 COPY_HAS_INTO_KEYWORD = False 606 STAR_EXCEPT = "EXCLUDE" 607 PAD_FILL_PATTERN_IS_REQUIRED = True 608 ARRAY_CONCAT_IS_VAR_LEN = False 609 ARRAY_SIZE_DIM_REQUIRED = False 610 611 TRANSFORMS = { 612 **generator.Generator.TRANSFORMS, 613 exp.ApproxDistinct: approx_count_distinct_sql, 614 exp.Array: inline_array_unless_query, 615 exp.ArrayFilter: rename_func("LIST_FILTER"), 616 exp.ArraySort: _array_sort_sql, 617 exp.ArraySum: rename_func("LIST_SUM"), 618 exp.BitwiseXor: rename_func("XOR"), 619 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 620 exp.CurrentDate: lambda *_: "CURRENT_DATE", 621 exp.CurrentTime: lambda *_: "CURRENT_TIME", 622 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 623 exp.DayOfMonth: rename_func("DAYOFMONTH"), 624 exp.DayOfWeek: rename_func("DAYOFWEEK"), 625 exp.DayOfWeekIso: rename_func("ISODOW"), 626 exp.DayOfYear: rename_func("DAYOFYEAR"), 627 exp.DataType: _datatype_sql, 628 exp.Date: _date_sql, 629 exp.DateAdd: _date_delta_sql, 630 exp.DateFromParts: rename_func("MAKE_DATE"), 631 exp.DateSub: _date_delta_sql, 632 exp.DateDiff: _date_diff_sql, 633 exp.DateStrToDate: datestrtodate_sql, 634 exp.Datetime: no_datetime_sql, 635 exp.DatetimeSub: _date_delta_sql, 636 exp.DatetimeAdd: _date_delta_sql, 637 exp.DateToDi: lambda self, 638 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 639 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 640 exp.DiToDate: lambda self, 641 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 642 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 643 exp.GenerateDateArray: _generate_datetime_array_sql, 644 exp.GenerateTimestampArray: _generate_datetime_array_sql, 645 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 646 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 647 exp.Explode: rename_func("UNNEST"), 648 exp.IntDiv: lambda self, e: self.binary(e, "//"), 649 exp.IsInf: rename_func("ISINF"), 650 exp.IsNan: rename_func("ISNAN"), 651 exp.JSONBExists: rename_func("JSON_EXISTS"), 652 exp.JSONExtract: _arrow_json_extract_sql, 653 exp.JSONExtractArray: _json_extract_value_array_sql, 654 exp.JSONExtractScalar: _arrow_json_extract_sql, 655 exp.JSONFormat: _json_format_sql, 656 exp.JSONValueArray: _json_extract_value_array_sql, 657 exp.Lateral: explode_to_unnest_sql, 658 exp.LogicalOr: rename_func("BOOL_OR"), 659 exp.LogicalAnd: rename_func("BOOL_AND"), 660 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 661 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 662 exp.MonthsBetween: lambda self, e: self.func( 663 "DATEDIFF", 664 "'month'", 665 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 666 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 667 ), 668 exp.PercentileCont: rename_func("QUANTILE_CONT"), 669 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 670 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 671 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 672 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 673 exp.RegexpReplace: lambda self, e: self.func( 674 "REGEXP_REPLACE", 675 e.this, 676 e.expression, 677 e.args.get("replacement"), 678 e.args.get("modifiers"), 679 ), 680 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 681 exp.RegexpILike: lambda self, e: self.func( 682 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 683 ), 684 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 685 exp.Return: lambda self, e: self.sql(e, "this"), 686 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 687 exp.Rand: rename_func("RANDOM"), 688 exp.SHA: rename_func("SHA1"), 689 exp.SHA2: sha256_sql, 690 exp.Split: rename_func("STR_SPLIT"), 691 exp.SortArray: _sort_array_sql, 692 exp.StrPosition: strposition_sql, 693 exp.StrToUnix: lambda self, e: self.func( 694 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 695 ), 696 exp.Struct: _struct_sql, 697 exp.Transform: rename_func("LIST_TRANSFORM"), 698 exp.TimeAdd: _date_delta_sql, 699 exp.Time: no_time_sql, 700 exp.TimeDiff: _timediff_sql, 701 exp.Timestamp: no_timestamp_sql, 702 exp.TimestampDiff: lambda self, e: self.func( 703 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 704 ), 705 exp.TimestampTrunc: timestamptrunc_sql(), 706 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 707 exp.TimeStrToTime: timestrtotime_sql, 708 exp.TimeStrToUnix: lambda self, e: self.func( 709 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 710 ), 711 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 712 exp.TimeToUnix: rename_func("EPOCH"), 713 exp.TsOrDiToDi: lambda self, 714 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 715 exp.TsOrDsAdd: _date_delta_sql, 716 exp.TsOrDsDiff: lambda self, e: self.func( 717 "DATE_DIFF", 718 f"'{e.args.get('unit') or 'DAY'}'", 719 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 720 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 721 ), 722 exp.UnixToStr: lambda self, e: self.func( 723 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 724 ), 725 exp.DatetimeTrunc: lambda self, e: self.func( 726 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 727 ), 728 exp.UnixToTime: _unix_to_time_sql, 729 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 730 exp.VariancePop: rename_func("VAR_POP"), 731 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 732 exp.Xor: bool_xor_sql, 733 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 734 rename_func("LEVENSHTEIN") 735 ), 736 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 737 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 738 exp.DateBin: rename_func("TIME_BUCKET"), 739 } 740 741 SUPPORTED_JSON_PATH_PARTS = { 742 exp.JSONPathKey, 743 exp.JSONPathRoot, 744 exp.JSONPathSubscript, 745 exp.JSONPathWildcard, 746 } 747 748 TYPE_MAPPING = { 749 **generator.Generator.TYPE_MAPPING, 750 exp.DataType.Type.BINARY: "BLOB", 751 exp.DataType.Type.BPCHAR: "TEXT", 752 exp.DataType.Type.CHAR: "TEXT", 753 exp.DataType.Type.DATETIME: "TIMESTAMP", 754 exp.DataType.Type.FLOAT: "REAL", 755 exp.DataType.Type.JSONB: "JSON", 756 exp.DataType.Type.NCHAR: "TEXT", 757 exp.DataType.Type.NVARCHAR: "TEXT", 758 exp.DataType.Type.UINT: "UINTEGER", 759 exp.DataType.Type.VARBINARY: "BLOB", 760 exp.DataType.Type.ROWVERSION: "BLOB", 761 exp.DataType.Type.VARCHAR: "TEXT", 762 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 763 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 764 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 765 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 766 } 767 768 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 769 RESERVED_KEYWORDS = { 770 "array", 771 "analyse", 772 "union", 773 "all", 774 "when", 775 "in_p", 776 "default", 777 "create_p", 778 "window", 779 "asymmetric", 780 "to", 781 "else", 782 "localtime", 783 "from", 784 "end_p", 785 "select", 786 "current_date", 787 "foreign", 788 "with", 789 "grant", 790 "session_user", 791 "or", 792 "except", 793 "references", 794 "fetch", 795 "limit", 796 "group_p", 797 "leading", 798 "into", 799 "collate", 800 "offset", 801 "do", 802 "then", 803 "localtimestamp", 804 "check_p", 805 "lateral_p", 806 "current_role", 807 "where", 808 "asc_p", 809 "placing", 810 "desc_p", 811 "user", 812 "unique", 813 "initially", 814 "column", 815 "both", 816 "some", 817 "as", 818 "any", 819 "only", 820 "deferrable", 821 "null_p", 822 "current_time", 823 "true_p", 824 "table", 825 "case", 826 "trailing", 827 "variadic", 828 "for", 829 "on", 830 "distinct", 831 "false_p", 832 "not", 833 "constraint", 834 "current_timestamp", 835 "returning", 836 "primary", 837 "intersect", 838 "having", 839 "analyze", 840 "current_user", 841 "and", 842 "cast", 843 "symmetric", 844 "using", 845 "order", 846 "current_catalog", 847 } 848 849 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 850 851 # DuckDB doesn't generally support CREATE TABLE .. properties 852 # https://duckdb.org/docs/sql/statements/create_table.html 853 PROPERTIES_LOCATION = { 854 prop: exp.Properties.Location.UNSUPPORTED 855 for prop in generator.Generator.PROPERTIES_LOCATION 856 } 857 858 # There are a few exceptions (e.g. temporary tables) which are supported or 859 # can be transpiled to DuckDB, so we explicitly override them accordingly 860 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 861 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 862 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 863 864 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 865 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 866 867 def strtotime_sql(self, expression: exp.StrToTime) -> str: 868 if expression.args.get("safe"): 869 formatted_time = self.format_time(expression) 870 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 871 return str_to_time_sql(self, expression) 872 873 def strtodate_sql(self, expression: exp.StrToDate) -> str: 874 if expression.args.get("safe"): 875 formatted_time = self.format_time(expression) 876 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 877 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 878 879 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 880 arg = expression.this 881 if expression.args.get("safe"): 882 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 883 return self.func("JSON", arg) 884 885 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 886 nano = expression.args.get("nano") 887 if nano is not None: 888 expression.set( 889 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 890 ) 891 892 return rename_func("MAKE_TIME")(self, expression) 893 894 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 895 sec = expression.args["sec"] 896 897 milli = expression.args.get("milli") 898 if milli is not None: 899 sec += milli.pop() / exp.Literal.number(1000.0) 900 901 nano = expression.args.get("nano") 902 if nano is not None: 903 sec += nano.pop() / exp.Literal.number(1000000000.0) 904 905 if milli or nano: 906 expression.set("sec", sec) 907 908 return rename_func("MAKE_TIMESTAMP")(self, expression) 909 910 def tablesample_sql( 911 self, 912 expression: exp.TableSample, 913 tablesample_keyword: t.Optional[str] = None, 914 ) -> str: 915 if not isinstance(expression.parent, exp.Select): 916 # This sample clause only applies to a single source, not the entire resulting relation 917 tablesample_keyword = "TABLESAMPLE" 918 919 if expression.args.get("size"): 920 method = expression.args.get("method") 921 if method and method.name.upper() != "RESERVOIR": 922 self.unsupported( 923 f"Sampling method {method} is not supported with a discrete sample count, " 924 "defaulting to reservoir sampling" 925 ) 926 expression.set("method", exp.var("RESERVOIR")) 927 928 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 929 930 def interval_sql(self, expression: exp.Interval) -> str: 931 multiplier: t.Optional[int] = None 932 unit = expression.text("unit").lower() 933 934 if unit.startswith("week"): 935 multiplier = 7 936 if unit.startswith("quarter"): 937 multiplier = 90 938 939 if multiplier: 940 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 941 942 return super().interval_sql(expression) 943 944 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 945 if isinstance(expression.parent, exp.UserDefinedFunction): 946 return self.sql(expression, "this") 947 return super().columndef_sql(expression, sep) 948 949 def join_sql(self, expression: exp.Join) -> str: 950 if ( 951 expression.side == "LEFT" 952 and not expression.args.get("on") 953 and isinstance(expression.this, exp.Unnest) 954 ): 955 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 956 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 957 return super().join_sql(expression.on(exp.true())) 958 959 return super().join_sql(expression) 960 961 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 962 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 963 if expression.args.get("is_end_exclusive"): 964 return rename_func("RANGE")(self, expression) 965 966 return self.function_fallback_sql(expression) 967 968 def countif_sql(self, expression: exp.CountIf) -> str: 969 if self.dialect.version >= Version("1.2"): 970 return self.function_fallback_sql(expression) 971 972 # https://github.com/tobymao/sqlglot/pull/4749 973 return count_if_to_sum(self, expression) 974 975 def bracket_sql(self, expression: exp.Bracket) -> str: 976 if self.dialect.version >= Version("1.2"): 977 return super().bracket_sql(expression) 978 979 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 980 this = expression.this 981 if isinstance(this, exp.Array): 982 this.replace(exp.paren(this)) 983 984 bracket = super().bracket_sql(expression) 985 986 if not expression.args.get("returns_list_for_maps"): 987 if not this.type: 988 from sqlglot.optimizer.annotate_types import annotate_types 989 990 this = annotate_types(this) 991 992 if this.is_type(exp.DataType.Type.MAP): 993 bracket = f"({bracket})[1]" 994 995 return bracket 996 997 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 998 expression_sql = self.sql(expression, "expression") 999 1000 func = expression.this 1001 if isinstance(func, exp.PERCENTILES): 1002 # Make the order key the first arg and slide the fraction to the right 1003 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1004 order_col = expression.find(exp.Ordered) 1005 if order_col: 1006 func.set("expression", func.this) 1007 func.set("this", order_col.this) 1008 1009 this = self.sql(expression, "this").rstrip(")") 1010 1011 return f"{this}{expression_sql})" 1012 1013 def length_sql(self, expression: exp.Length) -> str: 1014 arg = expression.this 1015 1016 # Dialects like BQ and Snowflake also accept binary values as args, so 1017 # DDB will attempt to infer the type or resort to case/when resolution 1018 if not expression.args.get("binary") or arg.is_string: 1019 return self.func("LENGTH", arg) 1020 1021 if not arg.type: 1022 from sqlglot.optimizer.annotate_types import annotate_types 1023 1024 arg = annotate_types(arg) 1025 1026 if arg.is_type(*exp.DataType.TEXT_TYPES): 1027 return self.func("LENGTH", arg) 1028 1029 # We need these casts to make duckdb's static type checker happy 1030 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1031 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1032 1033 case = ( 1034 exp.case(self.func("TYPEOF", arg)) 1035 .when( 1036 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 1037 ) # anonymous to break length_sql recursion 1038 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1039 ) 1040 1041 return self.sql(case) 1042 1043 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1044 this = expression.this 1045 key = expression.args.get("key") 1046 key_sql = key.name if isinstance(key, exp.Expression) else "" 1047 value_sql = self.sql(expression, "value") 1048 1049 kv_sql = f"{key_sql} := {value_sql}" 1050 1051 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1052 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1053 if isinstance(this, exp.Struct) and not this.expressions: 1054 return self.func("STRUCT_PACK", kv_sql) 1055 1056 return self.func("STRUCT_INSERT", this, kv_sql) 1057 1058 def unnest_sql(self, expression: exp.Unnest) -> str: 1059 explode_array = expression.args.get("explode_array") 1060 if explode_array: 1061 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1062 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1063 expression.expressions.append( 1064 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1065 ) 1066 1067 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1068 alias = expression.args.get("alias") 1069 if alias: 1070 expression.set("alias", None) 1071 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1072 1073 unnest_sql = super().unnest_sql(expression) 1074 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1075 return self.sql(select) 1076 1077 return super().unnest_sql(expression) 1078 1079 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1080 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1081 # DuckDB should render IGNORE NULLS only for the general-purpose 1082 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1083 return super().ignorenulls_sql(expression) 1084 1085 return self.sql(expression, "this") 1086 1087 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1088 this = self.sql(expression, "this") 1089 null_text = self.sql(expression, "null") 1090 1091 if null_text: 1092 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1093 1094 return self.func("ARRAY_TO_STRING", this, expression.expression) 1095 1096 @unsupported_args("position", "occurrence") 1097 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1098 group = expression.args.get("group") 1099 params = expression.args.get("parameters") 1100 1101 # Do not render group if there is no following argument, 1102 # and it's the default value for this dialect 1103 if ( 1104 not params 1105 and group 1106 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1107 ): 1108 group = None 1109 return self.func( 1110 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1111 ) 1112 1113 @unsupported_args("culture") 1114 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1115 fmt = expression.args.get("format") 1116 if fmt and fmt.is_int: 1117 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1118 1119 self.unsupported("Only integer formats are supported by NumberToStr") 1120 return self.function_fallback_sql(expression) 1121 1122 def autoincrementcolumnconstraint_sql(self, _) -> str: 1123 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1124 return ""
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
294 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 295 if isinstance(path, exp.Literal): 296 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 297 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 298 # This check ensures we'll avoid trying to parse these as JSON paths, which can 299 # either result in a noisy warning or in an invalid representation of the path. 300 path_text = path.name 301 if path_text.startswith("/") or "[#" in path_text: 302 return path 303 304 return super().to_json_path(path)
306 class Tokenizer(tokens.Tokenizer): 307 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 308 HEREDOC_STRINGS = ["$"] 309 310 HEREDOC_TAG_IS_IDENTIFIER = True 311 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 312 313 KEYWORDS = { 314 **tokens.Tokenizer.KEYWORDS, 315 "//": TokenType.DIV, 316 "**": TokenType.DSTAR, 317 "^@": TokenType.CARET_AT, 318 "@>": TokenType.AT_GT, 319 "<@": TokenType.LT_AT, 320 "ATTACH": TokenType.ATTACH, 321 "BINARY": TokenType.VARBINARY, 322 "BITSTRING": TokenType.BIT, 323 "BPCHAR": TokenType.TEXT, 324 "CHAR": TokenType.TEXT, 325 "CHARACTER VARYING": TokenType.TEXT, 326 "DETACH": TokenType.DETACH, 327 "EXCLUDE": TokenType.EXCEPT, 328 "LOGICAL": TokenType.BOOLEAN, 329 "ONLY": TokenType.ONLY, 330 "PIVOT_WIDER": TokenType.PIVOT, 331 "POSITIONAL": TokenType.POSITIONAL, 332 "SIGNED": TokenType.INT, 333 "STRING": TokenType.TEXT, 334 "SUMMARIZE": TokenType.SUMMARIZE, 335 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 336 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 337 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 338 "TIMESTAMP_US": TokenType.TIMESTAMP, 339 "UBIGINT": TokenType.UBIGINT, 340 "UINTEGER": TokenType.UINT, 341 "USMALLINT": TokenType.USMALLINT, 342 "UTINYINT": TokenType.UTINYINT, 343 "VARCHAR": TokenType.TEXT, 344 } 345 KEYWORDS.pop("/*+") 346 347 SINGLE_TOKENS = { 348 **tokens.Tokenizer.SINGLE_TOKENS, 349 "$": TokenType.PARAMETER, 350 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
352 class Parser(parser.Parser): 353 BITWISE = { 354 **parser.Parser.BITWISE, 355 TokenType.TILDA: exp.RegexpLike, 356 } 357 BITWISE.pop(TokenType.CARET) 358 359 RANGE_PARSERS = { 360 **parser.Parser.RANGE_PARSERS, 361 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 362 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 363 } 364 365 EXPONENT = { 366 **parser.Parser.EXPONENT, 367 TokenType.CARET: exp.Pow, 368 TokenType.DSTAR: exp.Pow, 369 } 370 371 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 372 373 FUNCTIONS = { 374 **parser.Parser.FUNCTIONS, 375 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 376 "ARRAY_SORT": exp.SortArray.from_arg_list, 377 "DATEDIFF": _build_date_diff, 378 "DATE_DIFF": _build_date_diff, 379 "DATE_TRUNC": date_trunc_to_time, 380 "DATETRUNC": date_trunc_to_time, 381 "DECODE": lambda args: exp.Decode( 382 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 383 ), 384 "EDITDIST3": exp.Levenshtein.from_arg_list, 385 "ENCODE": lambda args: exp.Encode( 386 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 387 ), 388 "EPOCH": exp.TimeToUnix.from_arg_list, 389 "EPOCH_MS": lambda args: exp.UnixToTime( 390 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 391 ), 392 "GENERATE_SERIES": _build_generate_series(), 393 "JSON": exp.ParseJSON.from_arg_list, 394 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 395 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 396 "LIST_HAS": exp.ArrayContains.from_arg_list, 397 "LIST_REVERSE_SORT": _build_sort_array_desc, 398 "LIST_SORT": exp.SortArray.from_arg_list, 399 "LIST_VALUE": lambda args: exp.Array(expressions=args), 400 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 401 "MAKE_TIMESTAMP": _build_make_timestamp, 402 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 403 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 404 "RANGE": _build_generate_series(end_exclusive=True), 405 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 406 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 407 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 408 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 409 this=seq_get(args, 0), 410 expression=seq_get(args, 1), 411 replacement=seq_get(args, 2), 412 modifiers=seq_get(args, 3), 413 ), 414 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 415 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 416 "STRING_SPLIT": exp.Split.from_arg_list, 417 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 418 "STRING_TO_ARRAY": exp.Split.from_arg_list, 419 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 420 "STRUCT_PACK": exp.Struct.from_arg_list, 421 "STR_SPLIT": exp.Split.from_arg_list, 422 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 423 "TIME_BUCKET": exp.DateBin.from_arg_list, 424 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 425 "UNNEST": exp.Explode.from_arg_list, 426 "XOR": binary_from_function(exp.BitwiseXor), 427 } 428 429 FUNCTIONS.pop("DATE_SUB") 430 FUNCTIONS.pop("GLOB") 431 432 FUNCTION_PARSERS = { 433 **parser.Parser.FUNCTION_PARSERS, 434 **dict.fromkeys( 435 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 436 ), 437 } 438 FUNCTION_PARSERS.pop("DECODE") 439 440 NO_PAREN_FUNCTION_PARSERS = { 441 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 442 "MAP": lambda self: self._parse_map(), 443 } 444 445 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 446 TokenType.SEMI, 447 TokenType.ANTI, 448 } 449 450 PLACEHOLDER_PARSERS = { 451 **parser.Parser.PLACEHOLDER_PARSERS, 452 TokenType.PARAMETER: lambda self: ( 453 self.expression(exp.Placeholder, this=self._prev.text) 454 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 455 else None 456 ), 457 } 458 459 TYPE_CONVERTERS = { 460 # https://duckdb.org/docs/sql/data_types/numeric 461 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 462 # https://duckdb.org/docs/sql/data_types/text 463 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 464 } 465 466 STATEMENT_PARSERS = { 467 **parser.Parser.STATEMENT_PARSERS, 468 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 469 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 470 } 471 472 def _parse_expression(self) -> t.Optional[exp.Expression]: 473 # DuckDB supports prefix aliases, e.g. foo: 1 474 if self._next and self._next.token_type == TokenType.COLON: 475 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 476 self._match(TokenType.COLON) 477 comments = self._prev_comments or [] 478 479 this = self._parse_assignment() 480 if isinstance(this, exp.Expression): 481 # Moves the comment next to the alias in `alias: expr /* comment */` 482 comments += this.pop_comments() or [] 483 484 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 485 486 return super()._parse_expression() 487 488 def _parse_table( 489 self, 490 schema: bool = False, 491 joins: bool = False, 492 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 493 parse_bracket: bool = False, 494 is_db_reference: bool = False, 495 parse_partition: bool = False, 496 ) -> t.Optional[exp.Expression]: 497 # DuckDB supports prefix aliases, e.g. FROM foo: bar 498 if self._next and self._next.token_type == TokenType.COLON: 499 alias = self._parse_table_alias( 500 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 501 ) 502 self._match(TokenType.COLON) 503 comments = self._prev_comments or [] 504 else: 505 alias = None 506 comments = [] 507 508 table = super()._parse_table( 509 schema=schema, 510 joins=joins, 511 alias_tokens=alias_tokens, 512 parse_bracket=parse_bracket, 513 is_db_reference=is_db_reference, 514 parse_partition=parse_partition, 515 ) 516 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 517 # Moves the comment next to the alias in `alias: table /* comment */` 518 comments += table.pop_comments() or [] 519 alias.comments = alias.pop_comments() + comments 520 table.set("alias", alias) 521 522 return table 523 524 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 525 # https://duckdb.org/docs/sql/samples.html 526 sample = super()._parse_table_sample(as_modifier=as_modifier) 527 if sample and not sample.args.get("method"): 528 if sample.args.get("size"): 529 sample.set("method", exp.var("RESERVOIR")) 530 else: 531 sample.set("method", exp.var("SYSTEM")) 532 533 return sample 534 535 def _parse_bracket( 536 self, this: t.Optional[exp.Expression] = None 537 ) -> t.Optional[exp.Expression]: 538 bracket = super()._parse_bracket(this) 539 540 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 541 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 542 bracket.set("returns_list_for_maps", True) 543 544 return bracket 545 546 def _parse_map(self) -> exp.ToMap | exp.Map: 547 if self._match(TokenType.L_BRACE, advance=False): 548 return self.expression(exp.ToMap, this=self._parse_bracket()) 549 550 args = self._parse_wrapped_csv(self._parse_assignment) 551 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 552 553 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 554 return self._parse_field_def() 555 556 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 557 if len(aggregations) == 1: 558 return super()._pivot_column_names(aggregations) 559 return pivot_column_names(aggregations, dialect="duckdb") 560 561 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 562 def _parse_attach_option() -> exp.AttachOption: 563 return self.expression( 564 exp.AttachOption, 565 this=self._parse_var(any_token=True), 566 expression=self._parse_field(any_token=True), 567 ) 568 569 self._match(TokenType.DATABASE) 570 exists = self._parse_exists(not_=is_attach) 571 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 572 573 if self._match(TokenType.L_PAREN, advance=False): 574 expressions = self._parse_wrapped_csv(_parse_attach_option) 575 else: 576 expressions = None 577 578 return ( 579 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 580 if is_attach 581 else self.expression(exp.Detach, this=this, exists=exists) 582 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
584 class Generator(generator.Generator): 585 PARAMETER_TOKEN = "$" 586 NAMED_PLACEHOLDER_TOKEN = "$" 587 JOIN_HINTS = False 588 TABLE_HINTS = False 589 QUERY_HINTS = False 590 LIMIT_FETCH = "LIMIT" 591 STRUCT_DELIMITER = ("(", ")") 592 RENAME_TABLE_WITH_DB = False 593 NVL2_SUPPORTED = False 594 SEMI_ANTI_JOIN_WITH_SIDE = False 595 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 596 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 597 LAST_DAY_SUPPORTS_DATE_PART = False 598 JSON_KEY_VALUE_PAIR_SEP = "," 599 IGNORE_NULLS_IN_FUNC = True 600 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 601 SUPPORTS_CREATE_TABLE_LIKE = False 602 MULTI_ARG_DISTINCT = False 603 CAN_IMPLEMENT_ARRAY_ANY = True 604 SUPPORTS_TO_NUMBER = False 605 COPY_HAS_INTO_KEYWORD = False 606 STAR_EXCEPT = "EXCLUDE" 607 PAD_FILL_PATTERN_IS_REQUIRED = True 608 ARRAY_CONCAT_IS_VAR_LEN = False 609 ARRAY_SIZE_DIM_REQUIRED = False 610 611 TRANSFORMS = { 612 **generator.Generator.TRANSFORMS, 613 exp.ApproxDistinct: approx_count_distinct_sql, 614 exp.Array: inline_array_unless_query, 615 exp.ArrayFilter: rename_func("LIST_FILTER"), 616 exp.ArraySort: _array_sort_sql, 617 exp.ArraySum: rename_func("LIST_SUM"), 618 exp.BitwiseXor: rename_func("XOR"), 619 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 620 exp.CurrentDate: lambda *_: "CURRENT_DATE", 621 exp.CurrentTime: lambda *_: "CURRENT_TIME", 622 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 623 exp.DayOfMonth: rename_func("DAYOFMONTH"), 624 exp.DayOfWeek: rename_func("DAYOFWEEK"), 625 exp.DayOfWeekIso: rename_func("ISODOW"), 626 exp.DayOfYear: rename_func("DAYOFYEAR"), 627 exp.DataType: _datatype_sql, 628 exp.Date: _date_sql, 629 exp.DateAdd: _date_delta_sql, 630 exp.DateFromParts: rename_func("MAKE_DATE"), 631 exp.DateSub: _date_delta_sql, 632 exp.DateDiff: _date_diff_sql, 633 exp.DateStrToDate: datestrtodate_sql, 634 exp.Datetime: no_datetime_sql, 635 exp.DatetimeSub: _date_delta_sql, 636 exp.DatetimeAdd: _date_delta_sql, 637 exp.DateToDi: lambda self, 638 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 639 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 640 exp.DiToDate: lambda self, 641 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 642 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 643 exp.GenerateDateArray: _generate_datetime_array_sql, 644 exp.GenerateTimestampArray: _generate_datetime_array_sql, 645 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 646 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 647 exp.Explode: rename_func("UNNEST"), 648 exp.IntDiv: lambda self, e: self.binary(e, "//"), 649 exp.IsInf: rename_func("ISINF"), 650 exp.IsNan: rename_func("ISNAN"), 651 exp.JSONBExists: rename_func("JSON_EXISTS"), 652 exp.JSONExtract: _arrow_json_extract_sql, 653 exp.JSONExtractArray: _json_extract_value_array_sql, 654 exp.JSONExtractScalar: _arrow_json_extract_sql, 655 exp.JSONFormat: _json_format_sql, 656 exp.JSONValueArray: _json_extract_value_array_sql, 657 exp.Lateral: explode_to_unnest_sql, 658 exp.LogicalOr: rename_func("BOOL_OR"), 659 exp.LogicalAnd: rename_func("BOOL_AND"), 660 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 661 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 662 exp.MonthsBetween: lambda self, e: self.func( 663 "DATEDIFF", 664 "'month'", 665 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 666 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 667 ), 668 exp.PercentileCont: rename_func("QUANTILE_CONT"), 669 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 670 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 671 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 672 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 673 exp.RegexpReplace: lambda self, e: self.func( 674 "REGEXP_REPLACE", 675 e.this, 676 e.expression, 677 e.args.get("replacement"), 678 e.args.get("modifiers"), 679 ), 680 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 681 exp.RegexpILike: lambda self, e: self.func( 682 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 683 ), 684 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 685 exp.Return: lambda self, e: self.sql(e, "this"), 686 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 687 exp.Rand: rename_func("RANDOM"), 688 exp.SHA: rename_func("SHA1"), 689 exp.SHA2: sha256_sql, 690 exp.Split: rename_func("STR_SPLIT"), 691 exp.SortArray: _sort_array_sql, 692 exp.StrPosition: strposition_sql, 693 exp.StrToUnix: lambda self, e: self.func( 694 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 695 ), 696 exp.Struct: _struct_sql, 697 exp.Transform: rename_func("LIST_TRANSFORM"), 698 exp.TimeAdd: _date_delta_sql, 699 exp.Time: no_time_sql, 700 exp.TimeDiff: _timediff_sql, 701 exp.Timestamp: no_timestamp_sql, 702 exp.TimestampDiff: lambda self, e: self.func( 703 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 704 ), 705 exp.TimestampTrunc: timestamptrunc_sql(), 706 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 707 exp.TimeStrToTime: timestrtotime_sql, 708 exp.TimeStrToUnix: lambda self, e: self.func( 709 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 710 ), 711 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 712 exp.TimeToUnix: rename_func("EPOCH"), 713 exp.TsOrDiToDi: lambda self, 714 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 715 exp.TsOrDsAdd: _date_delta_sql, 716 exp.TsOrDsDiff: lambda self, e: self.func( 717 "DATE_DIFF", 718 f"'{e.args.get('unit') or 'DAY'}'", 719 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 720 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 721 ), 722 exp.UnixToStr: lambda self, e: self.func( 723 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 724 ), 725 exp.DatetimeTrunc: lambda self, e: self.func( 726 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 727 ), 728 exp.UnixToTime: _unix_to_time_sql, 729 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 730 exp.VariancePop: rename_func("VAR_POP"), 731 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 732 exp.Xor: bool_xor_sql, 733 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 734 rename_func("LEVENSHTEIN") 735 ), 736 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 737 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 738 exp.DateBin: rename_func("TIME_BUCKET"), 739 } 740 741 SUPPORTED_JSON_PATH_PARTS = { 742 exp.JSONPathKey, 743 exp.JSONPathRoot, 744 exp.JSONPathSubscript, 745 exp.JSONPathWildcard, 746 } 747 748 TYPE_MAPPING = { 749 **generator.Generator.TYPE_MAPPING, 750 exp.DataType.Type.BINARY: "BLOB", 751 exp.DataType.Type.BPCHAR: "TEXT", 752 exp.DataType.Type.CHAR: "TEXT", 753 exp.DataType.Type.DATETIME: "TIMESTAMP", 754 exp.DataType.Type.FLOAT: "REAL", 755 exp.DataType.Type.JSONB: "JSON", 756 exp.DataType.Type.NCHAR: "TEXT", 757 exp.DataType.Type.NVARCHAR: "TEXT", 758 exp.DataType.Type.UINT: "UINTEGER", 759 exp.DataType.Type.VARBINARY: "BLOB", 760 exp.DataType.Type.ROWVERSION: "BLOB", 761 exp.DataType.Type.VARCHAR: "TEXT", 762 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 763 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 764 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 765 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 766 } 767 768 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 769 RESERVED_KEYWORDS = { 770 "array", 771 "analyse", 772 "union", 773 "all", 774 "when", 775 "in_p", 776 "default", 777 "create_p", 778 "window", 779 "asymmetric", 780 "to", 781 "else", 782 "localtime", 783 "from", 784 "end_p", 785 "select", 786 "current_date", 787 "foreign", 788 "with", 789 "grant", 790 "session_user", 791 "or", 792 "except", 793 "references", 794 "fetch", 795 "limit", 796 "group_p", 797 "leading", 798 "into", 799 "collate", 800 "offset", 801 "do", 802 "then", 803 "localtimestamp", 804 "check_p", 805 "lateral_p", 806 "current_role", 807 "where", 808 "asc_p", 809 "placing", 810 "desc_p", 811 "user", 812 "unique", 813 "initially", 814 "column", 815 "both", 816 "some", 817 "as", 818 "any", 819 "only", 820 "deferrable", 821 "null_p", 822 "current_time", 823 "true_p", 824 "table", 825 "case", 826 "trailing", 827 "variadic", 828 "for", 829 "on", 830 "distinct", 831 "false_p", 832 "not", 833 "constraint", 834 "current_timestamp", 835 "returning", 836 "primary", 837 "intersect", 838 "having", 839 "analyze", 840 "current_user", 841 "and", 842 "cast", 843 "symmetric", 844 "using", 845 "order", 846 "current_catalog", 847 } 848 849 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 850 851 # DuckDB doesn't generally support CREATE TABLE .. properties 852 # https://duckdb.org/docs/sql/statements/create_table.html 853 PROPERTIES_LOCATION = { 854 prop: exp.Properties.Location.UNSUPPORTED 855 for prop in generator.Generator.PROPERTIES_LOCATION 856 } 857 858 # There are a few exceptions (e.g. temporary tables) which are supported or 859 # can be transpiled to DuckDB, so we explicitly override them accordingly 860 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 861 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 862 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 863 864 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 865 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 866 867 def strtotime_sql(self, expression: exp.StrToTime) -> str: 868 if expression.args.get("safe"): 869 formatted_time = self.format_time(expression) 870 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 871 return str_to_time_sql(self, expression) 872 873 def strtodate_sql(self, expression: exp.StrToDate) -> str: 874 if expression.args.get("safe"): 875 formatted_time = self.format_time(expression) 876 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 877 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 878 879 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 880 arg = expression.this 881 if expression.args.get("safe"): 882 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 883 return self.func("JSON", arg) 884 885 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 886 nano = expression.args.get("nano") 887 if nano is not None: 888 expression.set( 889 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 890 ) 891 892 return rename_func("MAKE_TIME")(self, expression) 893 894 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 895 sec = expression.args["sec"] 896 897 milli = expression.args.get("milli") 898 if milli is not None: 899 sec += milli.pop() / exp.Literal.number(1000.0) 900 901 nano = expression.args.get("nano") 902 if nano is not None: 903 sec += nano.pop() / exp.Literal.number(1000000000.0) 904 905 if milli or nano: 906 expression.set("sec", sec) 907 908 return rename_func("MAKE_TIMESTAMP")(self, expression) 909 910 def tablesample_sql( 911 self, 912 expression: exp.TableSample, 913 tablesample_keyword: t.Optional[str] = None, 914 ) -> str: 915 if not isinstance(expression.parent, exp.Select): 916 # This sample clause only applies to a single source, not the entire resulting relation 917 tablesample_keyword = "TABLESAMPLE" 918 919 if expression.args.get("size"): 920 method = expression.args.get("method") 921 if method and method.name.upper() != "RESERVOIR": 922 self.unsupported( 923 f"Sampling method {method} is not supported with a discrete sample count, " 924 "defaulting to reservoir sampling" 925 ) 926 expression.set("method", exp.var("RESERVOIR")) 927 928 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 929 930 def interval_sql(self, expression: exp.Interval) -> str: 931 multiplier: t.Optional[int] = None 932 unit = expression.text("unit").lower() 933 934 if unit.startswith("week"): 935 multiplier = 7 936 if unit.startswith("quarter"): 937 multiplier = 90 938 939 if multiplier: 940 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 941 942 return super().interval_sql(expression) 943 944 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 945 if isinstance(expression.parent, exp.UserDefinedFunction): 946 return self.sql(expression, "this") 947 return super().columndef_sql(expression, sep) 948 949 def join_sql(self, expression: exp.Join) -> str: 950 if ( 951 expression.side == "LEFT" 952 and not expression.args.get("on") 953 and isinstance(expression.this, exp.Unnest) 954 ): 955 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 956 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 957 return super().join_sql(expression.on(exp.true())) 958 959 return super().join_sql(expression) 960 961 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 962 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 963 if expression.args.get("is_end_exclusive"): 964 return rename_func("RANGE")(self, expression) 965 966 return self.function_fallback_sql(expression) 967 968 def countif_sql(self, expression: exp.CountIf) -> str: 969 if self.dialect.version >= Version("1.2"): 970 return self.function_fallback_sql(expression) 971 972 # https://github.com/tobymao/sqlglot/pull/4749 973 return count_if_to_sum(self, expression) 974 975 def bracket_sql(self, expression: exp.Bracket) -> str: 976 if self.dialect.version >= Version("1.2"): 977 return super().bracket_sql(expression) 978 979 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 980 this = expression.this 981 if isinstance(this, exp.Array): 982 this.replace(exp.paren(this)) 983 984 bracket = super().bracket_sql(expression) 985 986 if not expression.args.get("returns_list_for_maps"): 987 if not this.type: 988 from sqlglot.optimizer.annotate_types import annotate_types 989 990 this = annotate_types(this) 991 992 if this.is_type(exp.DataType.Type.MAP): 993 bracket = f"({bracket})[1]" 994 995 return bracket 996 997 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 998 expression_sql = self.sql(expression, "expression") 999 1000 func = expression.this 1001 if isinstance(func, exp.PERCENTILES): 1002 # Make the order key the first arg and slide the fraction to the right 1003 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1004 order_col = expression.find(exp.Ordered) 1005 if order_col: 1006 func.set("expression", func.this) 1007 func.set("this", order_col.this) 1008 1009 this = self.sql(expression, "this").rstrip(")") 1010 1011 return f"{this}{expression_sql})" 1012 1013 def length_sql(self, expression: exp.Length) -> str: 1014 arg = expression.this 1015 1016 # Dialects like BQ and Snowflake also accept binary values as args, so 1017 # DDB will attempt to infer the type or resort to case/when resolution 1018 if not expression.args.get("binary") or arg.is_string: 1019 return self.func("LENGTH", arg) 1020 1021 if not arg.type: 1022 from sqlglot.optimizer.annotate_types import annotate_types 1023 1024 arg = annotate_types(arg) 1025 1026 if arg.is_type(*exp.DataType.TEXT_TYPES): 1027 return self.func("LENGTH", arg) 1028 1029 # We need these casts to make duckdb's static type checker happy 1030 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1031 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1032 1033 case = ( 1034 exp.case(self.func("TYPEOF", arg)) 1035 .when( 1036 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 1037 ) # anonymous to break length_sql recursion 1038 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1039 ) 1040 1041 return self.sql(case) 1042 1043 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1044 this = expression.this 1045 key = expression.args.get("key") 1046 key_sql = key.name if isinstance(key, exp.Expression) else "" 1047 value_sql = self.sql(expression, "value") 1048 1049 kv_sql = f"{key_sql} := {value_sql}" 1050 1051 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1052 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1053 if isinstance(this, exp.Struct) and not this.expressions: 1054 return self.func("STRUCT_PACK", kv_sql) 1055 1056 return self.func("STRUCT_INSERT", this, kv_sql) 1057 1058 def unnest_sql(self, expression: exp.Unnest) -> str: 1059 explode_array = expression.args.get("explode_array") 1060 if explode_array: 1061 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1062 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1063 expression.expressions.append( 1064 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1065 ) 1066 1067 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1068 alias = expression.args.get("alias") 1069 if alias: 1070 expression.set("alias", None) 1071 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1072 1073 unnest_sql = super().unnest_sql(expression) 1074 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1075 return self.sql(select) 1076 1077 return super().unnest_sql(expression) 1078 1079 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1080 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1081 # DuckDB should render IGNORE NULLS only for the general-purpose 1082 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1083 return super().ignorenulls_sql(expression) 1084 1085 return self.sql(expression, "this") 1086 1087 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1088 this = self.sql(expression, "this") 1089 null_text = self.sql(expression, "null") 1090 1091 if null_text: 1092 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1093 1094 return self.func("ARRAY_TO_STRING", this, expression.expression) 1095 1096 @unsupported_args("position", "occurrence") 1097 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1098 group = expression.args.get("group") 1099 params = expression.args.get("parameters") 1100 1101 # Do not render group if there is no following argument, 1102 # and it's the default value for this dialect 1103 if ( 1104 not params 1105 and group 1106 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1107 ): 1108 group = None 1109 return self.func( 1110 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1111 ) 1112 1113 @unsupported_args("culture") 1114 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1115 fmt = expression.args.get("format") 1116 if fmt and fmt.is_int: 1117 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1118 1119 self.unsupported("Only integer formats are supported by NumberToStr") 1120 return self.function_fallback_sql(expression) 1121 1122 def autoincrementcolumnconstraint_sql(self, _) -> str: 1123 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1124 return ""
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
873 def strtodate_sql(self, expression: exp.StrToDate) -> str: 874 if expression.args.get("safe"): 875 formatted_time = self.format_time(expression) 876 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 877 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
885 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 886 nano = expression.args.get("nano") 887 if nano is not None: 888 expression.set( 889 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 890 ) 891 892 return rename_func("MAKE_TIME")(self, expression)
894 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 895 sec = expression.args["sec"] 896 897 milli = expression.args.get("milli") 898 if milli is not None: 899 sec += milli.pop() / exp.Literal.number(1000.0) 900 901 nano = expression.args.get("nano") 902 if nano is not None: 903 sec += nano.pop() / exp.Literal.number(1000000000.0) 904 905 if milli or nano: 906 expression.set("sec", sec) 907 908 return rename_func("MAKE_TIMESTAMP")(self, expression)
910 def tablesample_sql( 911 self, 912 expression: exp.TableSample, 913 tablesample_keyword: t.Optional[str] = None, 914 ) -> str: 915 if not isinstance(expression.parent, exp.Select): 916 # This sample clause only applies to a single source, not the entire resulting relation 917 tablesample_keyword = "TABLESAMPLE" 918 919 if expression.args.get("size"): 920 method = expression.args.get("method") 921 if method and method.name.upper() != "RESERVOIR": 922 self.unsupported( 923 f"Sampling method {method} is not supported with a discrete sample count, " 924 "defaulting to reservoir sampling" 925 ) 926 expression.set("method", exp.var("RESERVOIR")) 927 928 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
930 def interval_sql(self, expression: exp.Interval) -> str: 931 multiplier: t.Optional[int] = None 932 unit = expression.text("unit").lower() 933 934 if unit.startswith("week"): 935 multiplier = 7 936 if unit.startswith("quarter"): 937 multiplier = 90 938 939 if multiplier: 940 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 941 942 return super().interval_sql(expression)
949 def join_sql(self, expression: exp.Join) -> str: 950 if ( 951 expression.side == "LEFT" 952 and not expression.args.get("on") 953 and isinstance(expression.this, exp.Unnest) 954 ): 955 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 956 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 957 return super().join_sql(expression.on(exp.true())) 958 959 return super().join_sql(expression)
975 def bracket_sql(self, expression: exp.Bracket) -> str: 976 if self.dialect.version >= Version("1.2"): 977 return super().bracket_sql(expression) 978 979 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 980 this = expression.this 981 if isinstance(this, exp.Array): 982 this.replace(exp.paren(this)) 983 984 bracket = super().bracket_sql(expression) 985 986 if not expression.args.get("returns_list_for_maps"): 987 if not this.type: 988 from sqlglot.optimizer.annotate_types import annotate_types 989 990 this = annotate_types(this) 991 992 if this.is_type(exp.DataType.Type.MAP): 993 bracket = f"({bracket})[1]" 994 995 return bracket
997 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 998 expression_sql = self.sql(expression, "expression") 999 1000 func = expression.this 1001 if isinstance(func, exp.PERCENTILES): 1002 # Make the order key the first arg and slide the fraction to the right 1003 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1004 order_col = expression.find(exp.Ordered) 1005 if order_col: 1006 func.set("expression", func.this) 1007 func.set("this", order_col.this) 1008 1009 this = self.sql(expression, "this").rstrip(")") 1010 1011 return f"{this}{expression_sql})"
1013 def length_sql(self, expression: exp.Length) -> str: 1014 arg = expression.this 1015 1016 # Dialects like BQ and Snowflake also accept binary values as args, so 1017 # DDB will attempt to infer the type or resort to case/when resolution 1018 if not expression.args.get("binary") or arg.is_string: 1019 return self.func("LENGTH", arg) 1020 1021 if not arg.type: 1022 from sqlglot.optimizer.annotate_types import annotate_types 1023 1024 arg = annotate_types(arg) 1025 1026 if arg.is_type(*exp.DataType.TEXT_TYPES): 1027 return self.func("LENGTH", arg) 1028 1029 # We need these casts to make duckdb's static type checker happy 1030 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1031 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1032 1033 case = ( 1034 exp.case(self.func("TYPEOF", arg)) 1035 .when( 1036 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 1037 ) # anonymous to break length_sql recursion 1038 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1039 ) 1040 1041 return self.sql(case)
1043 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1044 this = expression.this 1045 key = expression.args.get("key") 1046 key_sql = key.name if isinstance(key, exp.Expression) else "" 1047 value_sql = self.sql(expression, "value") 1048 1049 kv_sql = f"{key_sql} := {value_sql}" 1050 1051 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1052 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1053 if isinstance(this, exp.Struct) and not this.expressions: 1054 return self.func("STRUCT_PACK", kv_sql) 1055 1056 return self.func("STRUCT_INSERT", this, kv_sql)
1058 def unnest_sql(self, expression: exp.Unnest) -> str: 1059 explode_array = expression.args.get("explode_array") 1060 if explode_array: 1061 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1062 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1063 expression.expressions.append( 1064 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1065 ) 1066 1067 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1068 alias = expression.args.get("alias") 1069 if alias: 1070 expression.set("alias", None) 1071 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1072 1073 unnest_sql = super().unnest_sql(expression) 1074 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1075 return self.sql(select) 1076 1077 return super().unnest_sql(expression)
1079 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1080 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 1081 # DuckDB should render IGNORE NULLS only for the general-purpose 1082 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1083 return super().ignorenulls_sql(expression) 1084 1085 return self.sql(expression, "this")
1087 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1088 this = self.sql(expression, "this") 1089 null_text = self.sql(expression, "null") 1090 1091 if null_text: 1092 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1093 1094 return self.func("ARRAY_TO_STRING", this, expression.expression)
1096 @unsupported_args("position", "occurrence") 1097 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1098 group = expression.args.get("group") 1099 params = expression.args.get("parameters") 1100 1101 # Do not render group if there is no following argument, 1102 # and it's the default value for this dialect 1103 if ( 1104 not params 1105 and group 1106 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1107 ): 1108 group = None 1109 return self.func( 1110 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1111 )
1113 @unsupported_args("culture") 1114 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1115 fmt = expression.args.get("format") 1116 if fmt and fmt.is_int: 1117 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1118 1119 self.unsupported("Only integer formats are supported by NumberToStr") 1120 return self.function_fallback_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- put_sql