sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arrow_json_extract_sql, 13 binary_from_function, 14 bool_xor_sql, 15 build_default_decimal_type, 16 date_trunc_to_time, 17 datestrtodate_sql, 18 no_datetime_sql, 19 encode_decode_sql, 20 build_formatted_time, 21 inline_array_unless_query, 22 no_comment_column_constraint_sql, 23 no_time_sql, 24 no_timestamp_sql, 25 pivot_column_names, 26 rename_func, 27 strposition_sql, 28 str_to_time_sql, 29 timestamptrunc_sql, 30 timestrtotime_sql, 31 unit_to_var, 32 unit_to_str, 33 sha256_sql, 34 build_regexp_extract, 35 explode_to_unnest_sql, 36 no_make_interval_sql, 37) 38from sqlglot.generator import unsupported_args 39from sqlglot.helper import seq_get 40from sqlglot.tokens import TokenType 41from sqlglot.parser import binary_range_parser 42 43DATETIME_DELTA = t.Union[ 44 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 45] 46 47WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 48 exp.FirstValue, 49 exp.LastValue, 50 exp.Lag, 51 exp.Lead, 52 exp.NthValue, 53) 54 55 56def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 57 this = expression.this 58 unit = unit_to_var(expression) 59 op = ( 60 "+" 61 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 62 else "-" 63 ) 64 65 to_type: t.Optional[DATA_TYPE] = None 66 if isinstance(expression, exp.TsOrDsAdd): 67 to_type = expression.return_type 68 elif this.is_string: 69 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 70 to_type = ( 71 exp.DataType.Type.DATETIME 72 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 73 else exp.DataType.Type.DATE 74 ) 75 76 this = exp.cast(this, to_type) if to_type else this 77 78 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 79 80 81# BigQuery -> DuckDB conversion for the DATE function 82def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 83 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 84 zone = self.sql(expression, "zone") 85 86 if zone: 87 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 88 date_str = f"{date_str} || ' ' || {zone}" 89 90 # This will create a TIMESTAMP with time zone information 91 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 92 93 return result 94 95 96# BigQuery -> DuckDB conversion for the TIME_DIFF function 97def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 98 this = exp.cast(expression.this, exp.DataType.Type.TIME) 99 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 100 101 # Although the 2 dialects share similar signatures, BQ seems to inverse 102 # the sign of the result so the start/end time operands are flipped 103 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 104 105 106@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 107def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 108 return self.func("ARRAY_SORT", expression.this) 109 110 111def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 112 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 113 return self.func(name, expression.this) 114 115 116def _build_sort_array_desc(args: t.List) -> exp.Expression: 117 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 118 119 120def _build_date_diff(args: t.List) -> exp.Expression: 121 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 122 123 124def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 125 def _builder(args: t.List) -> exp.GenerateSeries: 126 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 127 if len(args) == 1: 128 # DuckDB uses 0 as a default for the series' start when it's omitted 129 args.insert(0, exp.Literal.number("0")) 130 131 gen_series = exp.GenerateSeries.from_arg_list(args) 132 gen_series.set("is_end_exclusive", end_exclusive) 133 134 return gen_series 135 136 return _builder 137 138 139def _build_make_timestamp(args: t.List) -> exp.Expression: 140 if len(args) == 1: 141 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 142 143 return exp.TimestampFromParts( 144 year=seq_get(args, 0), 145 month=seq_get(args, 1), 146 day=seq_get(args, 2), 147 hour=seq_get(args, 3), 148 min=seq_get(args, 4), 149 sec=seq_get(args, 5), 150 ) 151 152 153def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 154 args: t.List[str] = [] 155 156 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 157 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 158 # The transformation to ROW will take place if: 159 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 160 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 161 ancestor_cast = expression.find_ancestor(exp.Cast) 162 is_bq_inline_struct = ( 163 (expression.find(exp.PropertyEQ) is None) 164 and ancestor_cast 165 and any( 166 casted_type.is_type(exp.DataType.Type.STRUCT) 167 for casted_type in ancestor_cast.find_all(exp.DataType) 168 ) 169 ) 170 171 for i, expr in enumerate(expression.expressions): 172 is_property_eq = isinstance(expr, exp.PropertyEQ) 173 value = expr.expression if is_property_eq else expr 174 175 if is_bq_inline_struct: 176 args.append(self.sql(value)) 177 else: 178 key = expr.name if is_property_eq else f"_{i}" 179 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 180 181 csv_args = ", ".join(args) 182 183 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 184 185 186def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 187 if expression.is_type("array"): 188 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 189 190 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 191 if expression.is_type( 192 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 193 ): 194 return expression.this.value 195 196 return self.datatype_sql(expression) 197 198 199def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 200 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 201 return f"CAST({sql} AS TEXT)" 202 203 204def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 205 scale = expression.args.get("scale") 206 timestamp = expression.this 207 208 if scale in (None, exp.UnixToTime.SECONDS): 209 return self.func("TO_TIMESTAMP", timestamp) 210 if scale == exp.UnixToTime.MILLIS: 211 return self.func("EPOCH_MS", timestamp) 212 if scale == exp.UnixToTime.MICROS: 213 return self.func("MAKE_TIMESTAMP", timestamp) 214 215 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 216 217 218WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 219 220 221def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 222 arrow_sql = arrow_json_extract_sql(self, expression) 223 if not expression.same_parent and isinstance( 224 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 225 ): 226 arrow_sql = self.wrap(arrow_sql) 227 return arrow_sql 228 229 230def _implicit_datetime_cast( 231 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 232) -> t.Optional[exp.Expression]: 233 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 234 235 236def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 237 this = _implicit_datetime_cast(expression.this) 238 expr = _implicit_datetime_cast(expression.expression) 239 240 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 241 242 243def _generate_datetime_array_sql( 244 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 245) -> str: 246 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 247 248 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 249 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 250 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 251 252 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 253 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 254 start=start, end=end, step=expression.args.get("step") 255 ) 256 257 if is_generate_date_array: 258 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 259 # GENERATE_DATE_ARRAY we must cast it back to DATE array 260 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 261 262 return self.sql(gen_series) 263 264 265def _json_extract_value_array_sql( 266 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 267) -> str: 268 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 269 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 270 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 271 272 273class DuckDB(Dialect): 274 NULL_ORDERING = "nulls_are_last" 275 SUPPORTS_USER_DEFINED_TYPES = False 276 SAFE_DIVISION = True 277 INDEX_OFFSET = 1 278 CONCAT_COALESCE = True 279 SUPPORTS_ORDER_BY_ALL = True 280 SUPPORTS_FIXED_SIZE_ARRAYS = True 281 STRICT_JSON_PATH_SYNTAX = False 282 283 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 284 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 285 286 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 287 if isinstance(path, exp.Literal): 288 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 289 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 290 # This check ensures we'll avoid trying to parse these as JSON paths, which can 291 # either result in a noisy warning or in an invalid representation of the path. 292 path_text = path.name 293 if path_text.startswith("/") or "[#" in path_text: 294 return path 295 296 return super().to_json_path(path) 297 298 class Tokenizer(tokens.Tokenizer): 299 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 300 HEREDOC_STRINGS = ["$"] 301 302 HEREDOC_TAG_IS_IDENTIFIER = True 303 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 304 305 KEYWORDS = { 306 **tokens.Tokenizer.KEYWORDS, 307 "//": TokenType.DIV, 308 "**": TokenType.DSTAR, 309 "^@": TokenType.CARET_AT, 310 "@>": TokenType.AT_GT, 311 "<@": TokenType.LT_AT, 312 "ATTACH": TokenType.ATTACH, 313 "BINARY": TokenType.VARBINARY, 314 "BITSTRING": TokenType.BIT, 315 "BPCHAR": TokenType.TEXT, 316 "CHAR": TokenType.TEXT, 317 "CHARACTER VARYING": TokenType.TEXT, 318 "DETACH": TokenType.DETACH, 319 "EXCLUDE": TokenType.EXCEPT, 320 "LOGICAL": TokenType.BOOLEAN, 321 "ONLY": TokenType.ONLY, 322 "PIVOT_WIDER": TokenType.PIVOT, 323 "POSITIONAL": TokenType.POSITIONAL, 324 "SIGNED": TokenType.INT, 325 "STRING": TokenType.TEXT, 326 "SUMMARIZE": TokenType.SUMMARIZE, 327 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 328 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 329 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 330 "TIMESTAMP_US": TokenType.TIMESTAMP, 331 "UBIGINT": TokenType.UBIGINT, 332 "UINTEGER": TokenType.UINT, 333 "USMALLINT": TokenType.USMALLINT, 334 "UTINYINT": TokenType.UTINYINT, 335 "VARCHAR": TokenType.TEXT, 336 } 337 KEYWORDS.pop("/*+") 338 339 SINGLE_TOKENS = { 340 **tokens.Tokenizer.SINGLE_TOKENS, 341 "$": TokenType.PARAMETER, 342 } 343 344 class Parser(parser.Parser): 345 BITWISE = { 346 **parser.Parser.BITWISE, 347 TokenType.TILDA: exp.RegexpLike, 348 } 349 BITWISE.pop(TokenType.CARET) 350 351 RANGE_PARSERS = { 352 **parser.Parser.RANGE_PARSERS, 353 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 354 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 355 } 356 357 EXPONENT = { 358 **parser.Parser.EXPONENT, 359 TokenType.CARET: exp.Pow, 360 TokenType.DSTAR: exp.Pow, 361 } 362 363 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 364 365 FUNCTIONS = { 366 **parser.Parser.FUNCTIONS, 367 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 368 "ARRAY_SORT": exp.SortArray.from_arg_list, 369 "DATEDIFF": _build_date_diff, 370 "DATE_DIFF": _build_date_diff, 371 "DATE_TRUNC": date_trunc_to_time, 372 "DATETRUNC": date_trunc_to_time, 373 "DECODE": lambda args: exp.Decode( 374 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 375 ), 376 "ENCODE": lambda args: exp.Encode( 377 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 378 ), 379 "EPOCH": exp.TimeToUnix.from_arg_list, 380 "EPOCH_MS": lambda args: exp.UnixToTime( 381 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 382 ), 383 "JSON": exp.ParseJSON.from_arg_list, 384 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 385 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 386 "LIST_HAS": exp.ArrayContains.from_arg_list, 387 "LIST_REVERSE_SORT": _build_sort_array_desc, 388 "LIST_SORT": exp.SortArray.from_arg_list, 389 "LIST_VALUE": lambda args: exp.Array(expressions=args), 390 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 391 "MAKE_TIMESTAMP": _build_make_timestamp, 392 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 393 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 394 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 395 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 396 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 397 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 398 this=seq_get(args, 0), 399 expression=seq_get(args, 1), 400 replacement=seq_get(args, 2), 401 modifiers=seq_get(args, 3), 402 ), 403 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 404 "STRING_SPLIT": exp.Split.from_arg_list, 405 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 406 "STRING_TO_ARRAY": exp.Split.from_arg_list, 407 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 408 "STRUCT_PACK": exp.Struct.from_arg_list, 409 "STR_SPLIT": exp.Split.from_arg_list, 410 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 411 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 412 "UNNEST": exp.Explode.from_arg_list, 413 "XOR": binary_from_function(exp.BitwiseXor), 414 "GENERATE_SERIES": _build_generate_series(), 415 "RANGE": _build_generate_series(end_exclusive=True), 416 "EDITDIST3": exp.Levenshtein.from_arg_list, 417 } 418 419 FUNCTIONS.pop("DATE_SUB") 420 FUNCTIONS.pop("GLOB") 421 422 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 423 FUNCTION_PARSERS.pop("DECODE") 424 425 NO_PAREN_FUNCTION_PARSERS = { 426 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 427 "MAP": lambda self: self._parse_map(), 428 } 429 430 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 431 TokenType.SEMI, 432 TokenType.ANTI, 433 } 434 435 PLACEHOLDER_PARSERS = { 436 **parser.Parser.PLACEHOLDER_PARSERS, 437 TokenType.PARAMETER: lambda self: ( 438 self.expression(exp.Placeholder, this=self._prev.text) 439 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 440 else None 441 ), 442 } 443 444 TYPE_CONVERTERS = { 445 # https://duckdb.org/docs/sql/data_types/numeric 446 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 447 # https://duckdb.org/docs/sql/data_types/text 448 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 449 } 450 451 STATEMENT_PARSERS = { 452 **parser.Parser.STATEMENT_PARSERS, 453 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 454 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 455 } 456 457 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 458 # https://duckdb.org/docs/sql/samples.html 459 sample = super()._parse_table_sample(as_modifier=as_modifier) 460 if sample and not sample.args.get("method"): 461 if sample.args.get("size"): 462 sample.set("method", exp.var("RESERVOIR")) 463 else: 464 sample.set("method", exp.var("SYSTEM")) 465 466 return sample 467 468 def _parse_bracket( 469 self, this: t.Optional[exp.Expression] = None 470 ) -> t.Optional[exp.Expression]: 471 bracket = super()._parse_bracket(this) 472 if isinstance(bracket, exp.Bracket): 473 bracket.set("returns_list_for_maps", True) 474 475 return bracket 476 477 def _parse_map(self) -> exp.ToMap | exp.Map: 478 if self._match(TokenType.L_BRACE, advance=False): 479 return self.expression(exp.ToMap, this=self._parse_bracket()) 480 481 args = self._parse_wrapped_csv(self._parse_assignment) 482 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 483 484 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 485 return self._parse_field_def() 486 487 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 488 if len(aggregations) == 1: 489 return super()._pivot_column_names(aggregations) 490 return pivot_column_names(aggregations, dialect="duckdb") 491 492 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 493 def _parse_attach_option() -> exp.AttachOption: 494 return self.expression( 495 exp.AttachOption, 496 this=self._parse_var(any_token=True), 497 expression=self._parse_field(any_token=True), 498 ) 499 500 self._match(TokenType.DATABASE) 501 exists = self._parse_exists(not_=is_attach) 502 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 503 504 if self._match(TokenType.L_PAREN, advance=False): 505 expressions = self._parse_wrapped_csv(_parse_attach_option) 506 else: 507 expressions = None 508 509 return ( 510 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 511 if is_attach 512 else self.expression(exp.Detach, this=this, exists=exists) 513 ) 514 515 class Generator(generator.Generator): 516 PARAMETER_TOKEN = "$" 517 NAMED_PLACEHOLDER_TOKEN = "$" 518 JOIN_HINTS = False 519 TABLE_HINTS = False 520 QUERY_HINTS = False 521 LIMIT_FETCH = "LIMIT" 522 STRUCT_DELIMITER = ("(", ")") 523 RENAME_TABLE_WITH_DB = False 524 NVL2_SUPPORTED = False 525 SEMI_ANTI_JOIN_WITH_SIDE = False 526 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 527 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 528 LAST_DAY_SUPPORTS_DATE_PART = False 529 JSON_KEY_VALUE_PAIR_SEP = "," 530 IGNORE_NULLS_IN_FUNC = True 531 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 532 SUPPORTS_CREATE_TABLE_LIKE = False 533 MULTI_ARG_DISTINCT = False 534 CAN_IMPLEMENT_ARRAY_ANY = True 535 SUPPORTS_TO_NUMBER = False 536 COPY_HAS_INTO_KEYWORD = False 537 STAR_EXCEPT = "EXCLUDE" 538 PAD_FILL_PATTERN_IS_REQUIRED = True 539 ARRAY_CONCAT_IS_VAR_LEN = False 540 ARRAY_SIZE_DIM_REQUIRED = False 541 542 TRANSFORMS = { 543 **generator.Generator.TRANSFORMS, 544 exp.ApproxDistinct: approx_count_distinct_sql, 545 exp.Array: inline_array_unless_query, 546 exp.ArrayFilter: rename_func("LIST_FILTER"), 547 exp.ArraySort: _array_sort_sql, 548 exp.ArraySum: rename_func("LIST_SUM"), 549 exp.BitwiseXor: rename_func("XOR"), 550 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 551 exp.CurrentDate: lambda *_: "CURRENT_DATE", 552 exp.CurrentTime: lambda *_: "CURRENT_TIME", 553 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 554 exp.DayOfMonth: rename_func("DAYOFMONTH"), 555 exp.DayOfWeek: rename_func("DAYOFWEEK"), 556 exp.DayOfWeekIso: rename_func("ISODOW"), 557 exp.DayOfYear: rename_func("DAYOFYEAR"), 558 exp.DataType: _datatype_sql, 559 exp.Date: _date_sql, 560 exp.DateAdd: _date_delta_sql, 561 exp.DateFromParts: rename_func("MAKE_DATE"), 562 exp.DateSub: _date_delta_sql, 563 exp.DateDiff: _date_diff_sql, 564 exp.DateStrToDate: datestrtodate_sql, 565 exp.Datetime: no_datetime_sql, 566 exp.DatetimeSub: _date_delta_sql, 567 exp.DatetimeAdd: _date_delta_sql, 568 exp.DateToDi: lambda self, 569 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 570 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 571 exp.DiToDate: lambda self, 572 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 573 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 574 exp.GenerateDateArray: _generate_datetime_array_sql, 575 exp.GenerateTimestampArray: _generate_datetime_array_sql, 576 exp.Explode: rename_func("UNNEST"), 577 exp.IntDiv: lambda self, e: self.binary(e, "//"), 578 exp.IsInf: rename_func("ISINF"), 579 exp.IsNan: rename_func("ISNAN"), 580 exp.JSONBExists: rename_func("JSON_EXISTS"), 581 exp.JSONExtract: _arrow_json_extract_sql, 582 exp.JSONExtractArray: _json_extract_value_array_sql, 583 exp.JSONExtractScalar: _arrow_json_extract_sql, 584 exp.JSONFormat: _json_format_sql, 585 exp.JSONValueArray: _json_extract_value_array_sql, 586 exp.Lateral: explode_to_unnest_sql, 587 exp.LogicalOr: rename_func("BOOL_OR"), 588 exp.LogicalAnd: rename_func("BOOL_AND"), 589 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 590 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 591 exp.MonthsBetween: lambda self, e: self.func( 592 "DATEDIFF", 593 "'month'", 594 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 595 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 596 ), 597 exp.PercentileCont: rename_func("QUANTILE_CONT"), 598 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 599 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 600 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 601 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 602 exp.RegexpReplace: lambda self, e: self.func( 603 "REGEXP_REPLACE", 604 e.this, 605 e.expression, 606 e.args.get("replacement"), 607 e.args.get("modifiers"), 608 ), 609 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 610 exp.RegexpILike: lambda self, e: self.func( 611 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 612 ), 613 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 614 exp.Return: lambda self, e: self.sql(e, "this"), 615 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 616 exp.Rand: rename_func("RANDOM"), 617 exp.SHA: rename_func("SHA1"), 618 exp.SHA2: sha256_sql, 619 exp.Split: rename_func("STR_SPLIT"), 620 exp.SortArray: _sort_array_sql, 621 exp.StrPosition: strposition_sql, 622 exp.StrToUnix: lambda self, e: self.func( 623 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 624 ), 625 exp.Struct: _struct_sql, 626 exp.Transform: rename_func("LIST_TRANSFORM"), 627 exp.TimeAdd: _date_delta_sql, 628 exp.Time: no_time_sql, 629 exp.TimeDiff: _timediff_sql, 630 exp.Timestamp: no_timestamp_sql, 631 exp.TimestampDiff: lambda self, e: self.func( 632 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 633 ), 634 exp.TimestampTrunc: timestamptrunc_sql(), 635 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 636 exp.TimeStrToTime: timestrtotime_sql, 637 exp.TimeStrToUnix: lambda self, e: self.func( 638 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 639 ), 640 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 641 exp.TimeToUnix: rename_func("EPOCH"), 642 exp.TsOrDiToDi: lambda self, 643 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 644 exp.TsOrDsAdd: _date_delta_sql, 645 exp.TsOrDsDiff: lambda self, e: self.func( 646 "DATE_DIFF", 647 f"'{e.args.get('unit') or 'DAY'}'", 648 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 649 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 650 ), 651 exp.UnixToStr: lambda self, e: self.func( 652 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 653 ), 654 exp.DatetimeTrunc: lambda self, e: self.func( 655 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 656 ), 657 exp.UnixToTime: _unix_to_time_sql, 658 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 659 exp.VariancePop: rename_func("VAR_POP"), 660 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 661 exp.Xor: bool_xor_sql, 662 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 663 rename_func("LEVENSHTEIN") 664 ), 665 } 666 667 SUPPORTED_JSON_PATH_PARTS = { 668 exp.JSONPathKey, 669 exp.JSONPathRoot, 670 exp.JSONPathSubscript, 671 exp.JSONPathWildcard, 672 } 673 674 TYPE_MAPPING = { 675 **generator.Generator.TYPE_MAPPING, 676 exp.DataType.Type.BINARY: "BLOB", 677 exp.DataType.Type.BPCHAR: "TEXT", 678 exp.DataType.Type.CHAR: "TEXT", 679 exp.DataType.Type.DATETIME: "TIMESTAMP", 680 exp.DataType.Type.FLOAT: "REAL", 681 exp.DataType.Type.NCHAR: "TEXT", 682 exp.DataType.Type.NVARCHAR: "TEXT", 683 exp.DataType.Type.UINT: "UINTEGER", 684 exp.DataType.Type.VARBINARY: "BLOB", 685 exp.DataType.Type.ROWVERSION: "BLOB", 686 exp.DataType.Type.VARCHAR: "TEXT", 687 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 688 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 689 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 690 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 691 } 692 693 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 694 RESERVED_KEYWORDS = { 695 "array", 696 "analyse", 697 "union", 698 "all", 699 "when", 700 "in_p", 701 "default", 702 "create_p", 703 "window", 704 "asymmetric", 705 "to", 706 "else", 707 "localtime", 708 "from", 709 "end_p", 710 "select", 711 "current_date", 712 "foreign", 713 "with", 714 "grant", 715 "session_user", 716 "or", 717 "except", 718 "references", 719 "fetch", 720 "limit", 721 "group_p", 722 "leading", 723 "into", 724 "collate", 725 "offset", 726 "do", 727 "then", 728 "localtimestamp", 729 "check_p", 730 "lateral_p", 731 "current_role", 732 "where", 733 "asc_p", 734 "placing", 735 "desc_p", 736 "user", 737 "unique", 738 "initially", 739 "column", 740 "both", 741 "some", 742 "as", 743 "any", 744 "only", 745 "deferrable", 746 "null_p", 747 "current_time", 748 "true_p", 749 "table", 750 "case", 751 "trailing", 752 "variadic", 753 "for", 754 "on", 755 "distinct", 756 "false_p", 757 "not", 758 "constraint", 759 "current_timestamp", 760 "returning", 761 "primary", 762 "intersect", 763 "having", 764 "analyze", 765 "current_user", 766 "and", 767 "cast", 768 "symmetric", 769 "using", 770 "order", 771 "current_catalog", 772 } 773 774 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 775 776 # DuckDB doesn't generally support CREATE TABLE .. properties 777 # https://duckdb.org/docs/sql/statements/create_table.html 778 PROPERTIES_LOCATION = { 779 prop: exp.Properties.Location.UNSUPPORTED 780 for prop in generator.Generator.PROPERTIES_LOCATION 781 } 782 783 # There are a few exceptions (e.g. temporary tables) which are supported or 784 # can be transpiled to DuckDB, so we explicitly override them accordingly 785 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 786 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 787 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 788 789 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 790 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 791 792 def strtotime_sql(self, expression: exp.StrToTime) -> str: 793 if expression.args.get("safe"): 794 formatted_time = self.format_time(expression) 795 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 796 return str_to_time_sql(self, expression) 797 798 def strtodate_sql(self, expression: exp.StrToDate) -> str: 799 if expression.args.get("safe"): 800 formatted_time = self.format_time(expression) 801 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 802 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 803 804 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 805 arg = expression.this 806 if expression.args.get("safe"): 807 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 808 return self.func("JSON", arg) 809 810 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 811 nano = expression.args.get("nano") 812 if nano is not None: 813 expression.set( 814 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 815 ) 816 817 return rename_func("MAKE_TIME")(self, expression) 818 819 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 820 sec = expression.args["sec"] 821 822 milli = expression.args.get("milli") 823 if milli is not None: 824 sec += milli.pop() / exp.Literal.number(1000.0) 825 826 nano = expression.args.get("nano") 827 if nano is not None: 828 sec += nano.pop() / exp.Literal.number(1000000000.0) 829 830 if milli or nano: 831 expression.set("sec", sec) 832 833 return rename_func("MAKE_TIMESTAMP")(self, expression) 834 835 def tablesample_sql( 836 self, 837 expression: exp.TableSample, 838 tablesample_keyword: t.Optional[str] = None, 839 ) -> str: 840 if not isinstance(expression.parent, exp.Select): 841 # This sample clause only applies to a single source, not the entire resulting relation 842 tablesample_keyword = "TABLESAMPLE" 843 844 if expression.args.get("size"): 845 method = expression.args.get("method") 846 if method and method.name.upper() != "RESERVOIR": 847 self.unsupported( 848 f"Sampling method {method} is not supported with a discrete sample count, " 849 "defaulting to reservoir sampling" 850 ) 851 expression.set("method", exp.var("RESERVOIR")) 852 853 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 854 855 def interval_sql(self, expression: exp.Interval) -> str: 856 multiplier: t.Optional[int] = None 857 unit = expression.text("unit").lower() 858 859 if unit.startswith("week"): 860 multiplier = 7 861 if unit.startswith("quarter"): 862 multiplier = 90 863 864 if multiplier: 865 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 866 867 return super().interval_sql(expression) 868 869 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 870 if isinstance(expression.parent, exp.UserDefinedFunction): 871 return self.sql(expression, "this") 872 return super().columndef_sql(expression, sep) 873 874 def join_sql(self, expression: exp.Join) -> str: 875 if ( 876 expression.side == "LEFT" 877 and not expression.args.get("on") 878 and isinstance(expression.this, exp.Unnest) 879 ): 880 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 881 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 882 return super().join_sql(expression.on(exp.true())) 883 884 return super().join_sql(expression) 885 886 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 887 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 888 if expression.args.get("is_end_exclusive"): 889 return rename_func("RANGE")(self, expression) 890 891 return self.function_fallback_sql(expression) 892 893 def bracket_sql(self, expression: exp.Bracket) -> str: 894 this = expression.this 895 if isinstance(this, exp.Array): 896 this.replace(exp.paren(this)) 897 898 bracket = super().bracket_sql(expression) 899 900 if not expression.args.get("returns_list_for_maps"): 901 if not this.type: 902 from sqlglot.optimizer.annotate_types import annotate_types 903 904 this = annotate_types(this) 905 906 if this.is_type(exp.DataType.Type.MAP): 907 bracket = f"({bracket})[1]" 908 909 return bracket 910 911 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 912 expression_sql = self.sql(expression, "expression") 913 914 func = expression.this 915 if isinstance(func, exp.PERCENTILES): 916 # Make the order key the first arg and slide the fraction to the right 917 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 918 order_col = expression.find(exp.Ordered) 919 if order_col: 920 func.set("expression", func.this) 921 func.set("this", order_col.this) 922 923 this = self.sql(expression, "this").rstrip(")") 924 925 return f"{this}{expression_sql})" 926 927 def length_sql(self, expression: exp.Length) -> str: 928 arg = expression.this 929 930 # Dialects like BQ and Snowflake also accept binary values as args, so 931 # DDB will attempt to infer the type or resort to case/when resolution 932 if not expression.args.get("binary") or arg.is_string: 933 return self.func("LENGTH", arg) 934 935 if not arg.type: 936 from sqlglot.optimizer.annotate_types import annotate_types 937 938 arg = annotate_types(arg) 939 940 if arg.is_type(*exp.DataType.TEXT_TYPES): 941 return self.func("LENGTH", arg) 942 943 # We need these casts to make duckdb's static type checker happy 944 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 945 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 946 947 case = ( 948 exp.case(self.func("TYPEOF", arg)) 949 .when( 950 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 951 ) # anonymous to break length_sql recursion 952 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 953 ) 954 955 return self.sql(case) 956 957 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 958 this = expression.this 959 key = expression.args.get("key") 960 key_sql = key.name if isinstance(key, exp.Expression) else "" 961 value_sql = self.sql(expression, "value") 962 963 kv_sql = f"{key_sql} := {value_sql}" 964 965 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 966 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 967 if isinstance(this, exp.Struct) and not this.expressions: 968 return self.func("STRUCT_PACK", kv_sql) 969 970 return self.func("STRUCT_INSERT", this, kv_sql) 971 972 def unnest_sql(self, expression: exp.Unnest) -> str: 973 explode_array = expression.args.get("explode_array") 974 if explode_array: 975 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 976 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 977 expression.expressions.append( 978 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 979 ) 980 981 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 982 alias = expression.args.get("alias") 983 if alias: 984 expression.set("alias", None) 985 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 986 987 unnest_sql = super().unnest_sql(expression) 988 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 989 return self.sql(select) 990 991 return super().unnest_sql(expression) 992 993 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 994 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 995 # DuckDB should render IGNORE NULLS only for the general-purpose 996 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 997 return super().ignorenulls_sql(expression) 998 999 return self.sql(expression, "this") 1000 1001 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1002 this = self.sql(expression, "this") 1003 null_text = self.sql(expression, "null") 1004 1005 if null_text: 1006 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1007 1008 return self.func("ARRAY_TO_STRING", this, expression.expression) 1009 1010 @unsupported_args("position", "occurrence") 1011 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1012 group = expression.args.get("group") 1013 params = expression.args.get("parameters") 1014 1015 # Do not render group if there is no following argument, 1016 # and it's the default value for this dialect 1017 if ( 1018 not params 1019 and group 1020 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1021 ): 1022 group = None 1023 return self.func( 1024 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1025 ) 1026 1027 @unsupported_args("culture") 1028 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1029 fmt = expression.args.get("format") 1030 if fmt and fmt.is_int: 1031 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1032 1033 self.unsupported("Only integer formats are supported by NumberToStr") 1034 return self.function_fallback_sql(expression)
274class DuckDB(Dialect): 275 NULL_ORDERING = "nulls_are_last" 276 SUPPORTS_USER_DEFINED_TYPES = False 277 SAFE_DIVISION = True 278 INDEX_OFFSET = 1 279 CONCAT_COALESCE = True 280 SUPPORTS_ORDER_BY_ALL = True 281 SUPPORTS_FIXED_SIZE_ARRAYS = True 282 STRICT_JSON_PATH_SYNTAX = False 283 284 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 285 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 286 287 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 288 if isinstance(path, exp.Literal): 289 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 290 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 291 # This check ensures we'll avoid trying to parse these as JSON paths, which can 292 # either result in a noisy warning or in an invalid representation of the path. 293 path_text = path.name 294 if path_text.startswith("/") or "[#" in path_text: 295 return path 296 297 return super().to_json_path(path) 298 299 class Tokenizer(tokens.Tokenizer): 300 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 301 HEREDOC_STRINGS = ["$"] 302 303 HEREDOC_TAG_IS_IDENTIFIER = True 304 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "//": TokenType.DIV, 309 "**": TokenType.DSTAR, 310 "^@": TokenType.CARET_AT, 311 "@>": TokenType.AT_GT, 312 "<@": TokenType.LT_AT, 313 "ATTACH": TokenType.ATTACH, 314 "BINARY": TokenType.VARBINARY, 315 "BITSTRING": TokenType.BIT, 316 "BPCHAR": TokenType.TEXT, 317 "CHAR": TokenType.TEXT, 318 "CHARACTER VARYING": TokenType.TEXT, 319 "DETACH": TokenType.DETACH, 320 "EXCLUDE": TokenType.EXCEPT, 321 "LOGICAL": TokenType.BOOLEAN, 322 "ONLY": TokenType.ONLY, 323 "PIVOT_WIDER": TokenType.PIVOT, 324 "POSITIONAL": TokenType.POSITIONAL, 325 "SIGNED": TokenType.INT, 326 "STRING": TokenType.TEXT, 327 "SUMMARIZE": TokenType.SUMMARIZE, 328 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 329 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 330 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 331 "TIMESTAMP_US": TokenType.TIMESTAMP, 332 "UBIGINT": TokenType.UBIGINT, 333 "UINTEGER": TokenType.UINT, 334 "USMALLINT": TokenType.USMALLINT, 335 "UTINYINT": TokenType.UTINYINT, 336 "VARCHAR": TokenType.TEXT, 337 } 338 KEYWORDS.pop("/*+") 339 340 SINGLE_TOKENS = { 341 **tokens.Tokenizer.SINGLE_TOKENS, 342 "$": TokenType.PARAMETER, 343 } 344 345 class Parser(parser.Parser): 346 BITWISE = { 347 **parser.Parser.BITWISE, 348 TokenType.TILDA: exp.RegexpLike, 349 } 350 BITWISE.pop(TokenType.CARET) 351 352 RANGE_PARSERS = { 353 **parser.Parser.RANGE_PARSERS, 354 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 355 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 356 } 357 358 EXPONENT = { 359 **parser.Parser.EXPONENT, 360 TokenType.CARET: exp.Pow, 361 TokenType.DSTAR: exp.Pow, 362 } 363 364 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 365 366 FUNCTIONS = { 367 **parser.Parser.FUNCTIONS, 368 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 369 "ARRAY_SORT": exp.SortArray.from_arg_list, 370 "DATEDIFF": _build_date_diff, 371 "DATE_DIFF": _build_date_diff, 372 "DATE_TRUNC": date_trunc_to_time, 373 "DATETRUNC": date_trunc_to_time, 374 "DECODE": lambda args: exp.Decode( 375 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 376 ), 377 "ENCODE": lambda args: exp.Encode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "EPOCH": exp.TimeToUnix.from_arg_list, 381 "EPOCH_MS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 383 ), 384 "JSON": exp.ParseJSON.from_arg_list, 385 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 386 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 387 "LIST_HAS": exp.ArrayContains.from_arg_list, 388 "LIST_REVERSE_SORT": _build_sort_array_desc, 389 "LIST_SORT": exp.SortArray.from_arg_list, 390 "LIST_VALUE": lambda args: exp.Array(expressions=args), 391 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 392 "MAKE_TIMESTAMP": _build_make_timestamp, 393 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 394 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 395 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 396 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 397 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 398 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 399 this=seq_get(args, 0), 400 expression=seq_get(args, 1), 401 replacement=seq_get(args, 2), 402 modifiers=seq_get(args, 3), 403 ), 404 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 405 "STRING_SPLIT": exp.Split.from_arg_list, 406 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 407 "STRING_TO_ARRAY": exp.Split.from_arg_list, 408 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 409 "STRUCT_PACK": exp.Struct.from_arg_list, 410 "STR_SPLIT": exp.Split.from_arg_list, 411 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 412 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 413 "UNNEST": exp.Explode.from_arg_list, 414 "XOR": binary_from_function(exp.BitwiseXor), 415 "GENERATE_SERIES": _build_generate_series(), 416 "RANGE": _build_generate_series(end_exclusive=True), 417 "EDITDIST3": exp.Levenshtein.from_arg_list, 418 } 419 420 FUNCTIONS.pop("DATE_SUB") 421 FUNCTIONS.pop("GLOB") 422 423 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 424 FUNCTION_PARSERS.pop("DECODE") 425 426 NO_PAREN_FUNCTION_PARSERS = { 427 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 428 "MAP": lambda self: self._parse_map(), 429 } 430 431 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 432 TokenType.SEMI, 433 TokenType.ANTI, 434 } 435 436 PLACEHOLDER_PARSERS = { 437 **parser.Parser.PLACEHOLDER_PARSERS, 438 TokenType.PARAMETER: lambda self: ( 439 self.expression(exp.Placeholder, this=self._prev.text) 440 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 441 else None 442 ), 443 } 444 445 TYPE_CONVERTERS = { 446 # https://duckdb.org/docs/sql/data_types/numeric 447 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 448 # https://duckdb.org/docs/sql/data_types/text 449 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 450 } 451 452 STATEMENT_PARSERS = { 453 **parser.Parser.STATEMENT_PARSERS, 454 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 455 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 456 } 457 458 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 459 # https://duckdb.org/docs/sql/samples.html 460 sample = super()._parse_table_sample(as_modifier=as_modifier) 461 if sample and not sample.args.get("method"): 462 if sample.args.get("size"): 463 sample.set("method", exp.var("RESERVOIR")) 464 else: 465 sample.set("method", exp.var("SYSTEM")) 466 467 return sample 468 469 def _parse_bracket( 470 self, this: t.Optional[exp.Expression] = None 471 ) -> t.Optional[exp.Expression]: 472 bracket = super()._parse_bracket(this) 473 if isinstance(bracket, exp.Bracket): 474 bracket.set("returns_list_for_maps", True) 475 476 return bracket 477 478 def _parse_map(self) -> exp.ToMap | exp.Map: 479 if self._match(TokenType.L_BRACE, advance=False): 480 return self.expression(exp.ToMap, this=self._parse_bracket()) 481 482 args = self._parse_wrapped_csv(self._parse_assignment) 483 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 484 485 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 486 return self._parse_field_def() 487 488 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 489 if len(aggregations) == 1: 490 return super()._pivot_column_names(aggregations) 491 return pivot_column_names(aggregations, dialect="duckdb") 492 493 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 494 def _parse_attach_option() -> exp.AttachOption: 495 return self.expression( 496 exp.AttachOption, 497 this=self._parse_var(any_token=True), 498 expression=self._parse_field(any_token=True), 499 ) 500 501 self._match(TokenType.DATABASE) 502 exists = self._parse_exists(not_=is_attach) 503 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 504 505 if self._match(TokenType.L_PAREN, advance=False): 506 expressions = self._parse_wrapped_csv(_parse_attach_option) 507 else: 508 expressions = None 509 510 return ( 511 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 512 if is_attach 513 else self.expression(exp.Detach, this=this, exists=exists) 514 ) 515 516 class Generator(generator.Generator): 517 PARAMETER_TOKEN = "$" 518 NAMED_PLACEHOLDER_TOKEN = "$" 519 JOIN_HINTS = False 520 TABLE_HINTS = False 521 QUERY_HINTS = False 522 LIMIT_FETCH = "LIMIT" 523 STRUCT_DELIMITER = ("(", ")") 524 RENAME_TABLE_WITH_DB = False 525 NVL2_SUPPORTED = False 526 SEMI_ANTI_JOIN_WITH_SIDE = False 527 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 528 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 529 LAST_DAY_SUPPORTS_DATE_PART = False 530 JSON_KEY_VALUE_PAIR_SEP = "," 531 IGNORE_NULLS_IN_FUNC = True 532 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 533 SUPPORTS_CREATE_TABLE_LIKE = False 534 MULTI_ARG_DISTINCT = False 535 CAN_IMPLEMENT_ARRAY_ANY = True 536 SUPPORTS_TO_NUMBER = False 537 COPY_HAS_INTO_KEYWORD = False 538 STAR_EXCEPT = "EXCLUDE" 539 PAD_FILL_PATTERN_IS_REQUIRED = True 540 ARRAY_CONCAT_IS_VAR_LEN = False 541 ARRAY_SIZE_DIM_REQUIRED = False 542 543 TRANSFORMS = { 544 **generator.Generator.TRANSFORMS, 545 exp.ApproxDistinct: approx_count_distinct_sql, 546 exp.Array: inline_array_unless_query, 547 exp.ArrayFilter: rename_func("LIST_FILTER"), 548 exp.ArraySort: _array_sort_sql, 549 exp.ArraySum: rename_func("LIST_SUM"), 550 exp.BitwiseXor: rename_func("XOR"), 551 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 552 exp.CurrentDate: lambda *_: "CURRENT_DATE", 553 exp.CurrentTime: lambda *_: "CURRENT_TIME", 554 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 555 exp.DayOfMonth: rename_func("DAYOFMONTH"), 556 exp.DayOfWeek: rename_func("DAYOFWEEK"), 557 exp.DayOfWeekIso: rename_func("ISODOW"), 558 exp.DayOfYear: rename_func("DAYOFYEAR"), 559 exp.DataType: _datatype_sql, 560 exp.Date: _date_sql, 561 exp.DateAdd: _date_delta_sql, 562 exp.DateFromParts: rename_func("MAKE_DATE"), 563 exp.DateSub: _date_delta_sql, 564 exp.DateDiff: _date_diff_sql, 565 exp.DateStrToDate: datestrtodate_sql, 566 exp.Datetime: no_datetime_sql, 567 exp.DatetimeSub: _date_delta_sql, 568 exp.DatetimeAdd: _date_delta_sql, 569 exp.DateToDi: lambda self, 570 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 571 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 572 exp.DiToDate: lambda self, 573 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 574 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 575 exp.GenerateDateArray: _generate_datetime_array_sql, 576 exp.GenerateTimestampArray: _generate_datetime_array_sql, 577 exp.Explode: rename_func("UNNEST"), 578 exp.IntDiv: lambda self, e: self.binary(e, "//"), 579 exp.IsInf: rename_func("ISINF"), 580 exp.IsNan: rename_func("ISNAN"), 581 exp.JSONBExists: rename_func("JSON_EXISTS"), 582 exp.JSONExtract: _arrow_json_extract_sql, 583 exp.JSONExtractArray: _json_extract_value_array_sql, 584 exp.JSONExtractScalar: _arrow_json_extract_sql, 585 exp.JSONFormat: _json_format_sql, 586 exp.JSONValueArray: _json_extract_value_array_sql, 587 exp.Lateral: explode_to_unnest_sql, 588 exp.LogicalOr: rename_func("BOOL_OR"), 589 exp.LogicalAnd: rename_func("BOOL_AND"), 590 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 591 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 592 exp.MonthsBetween: lambda self, e: self.func( 593 "DATEDIFF", 594 "'month'", 595 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 596 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 597 ), 598 exp.PercentileCont: rename_func("QUANTILE_CONT"), 599 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 600 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 601 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 602 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 603 exp.RegexpReplace: lambda self, e: self.func( 604 "REGEXP_REPLACE", 605 e.this, 606 e.expression, 607 e.args.get("replacement"), 608 e.args.get("modifiers"), 609 ), 610 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 611 exp.RegexpILike: lambda self, e: self.func( 612 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 613 ), 614 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 615 exp.Return: lambda self, e: self.sql(e, "this"), 616 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 617 exp.Rand: rename_func("RANDOM"), 618 exp.SHA: rename_func("SHA1"), 619 exp.SHA2: sha256_sql, 620 exp.Split: rename_func("STR_SPLIT"), 621 exp.SortArray: _sort_array_sql, 622 exp.StrPosition: strposition_sql, 623 exp.StrToUnix: lambda self, e: self.func( 624 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 625 ), 626 exp.Struct: _struct_sql, 627 exp.Transform: rename_func("LIST_TRANSFORM"), 628 exp.TimeAdd: _date_delta_sql, 629 exp.Time: no_time_sql, 630 exp.TimeDiff: _timediff_sql, 631 exp.Timestamp: no_timestamp_sql, 632 exp.TimestampDiff: lambda self, e: self.func( 633 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 634 ), 635 exp.TimestampTrunc: timestamptrunc_sql(), 636 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 637 exp.TimeStrToTime: timestrtotime_sql, 638 exp.TimeStrToUnix: lambda self, e: self.func( 639 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 640 ), 641 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 642 exp.TimeToUnix: rename_func("EPOCH"), 643 exp.TsOrDiToDi: lambda self, 644 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 645 exp.TsOrDsAdd: _date_delta_sql, 646 exp.TsOrDsDiff: lambda self, e: self.func( 647 "DATE_DIFF", 648 f"'{e.args.get('unit') or 'DAY'}'", 649 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 650 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 651 ), 652 exp.UnixToStr: lambda self, e: self.func( 653 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 654 ), 655 exp.DatetimeTrunc: lambda self, e: self.func( 656 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 657 ), 658 exp.UnixToTime: _unix_to_time_sql, 659 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 660 exp.VariancePop: rename_func("VAR_POP"), 661 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 662 exp.Xor: bool_xor_sql, 663 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 664 rename_func("LEVENSHTEIN") 665 ), 666 } 667 668 SUPPORTED_JSON_PATH_PARTS = { 669 exp.JSONPathKey, 670 exp.JSONPathRoot, 671 exp.JSONPathSubscript, 672 exp.JSONPathWildcard, 673 } 674 675 TYPE_MAPPING = { 676 **generator.Generator.TYPE_MAPPING, 677 exp.DataType.Type.BINARY: "BLOB", 678 exp.DataType.Type.BPCHAR: "TEXT", 679 exp.DataType.Type.CHAR: "TEXT", 680 exp.DataType.Type.DATETIME: "TIMESTAMP", 681 exp.DataType.Type.FLOAT: "REAL", 682 exp.DataType.Type.NCHAR: "TEXT", 683 exp.DataType.Type.NVARCHAR: "TEXT", 684 exp.DataType.Type.UINT: "UINTEGER", 685 exp.DataType.Type.VARBINARY: "BLOB", 686 exp.DataType.Type.ROWVERSION: "BLOB", 687 exp.DataType.Type.VARCHAR: "TEXT", 688 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 689 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 690 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 691 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 692 } 693 694 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 695 RESERVED_KEYWORDS = { 696 "array", 697 "analyse", 698 "union", 699 "all", 700 "when", 701 "in_p", 702 "default", 703 "create_p", 704 "window", 705 "asymmetric", 706 "to", 707 "else", 708 "localtime", 709 "from", 710 "end_p", 711 "select", 712 "current_date", 713 "foreign", 714 "with", 715 "grant", 716 "session_user", 717 "or", 718 "except", 719 "references", 720 "fetch", 721 "limit", 722 "group_p", 723 "leading", 724 "into", 725 "collate", 726 "offset", 727 "do", 728 "then", 729 "localtimestamp", 730 "check_p", 731 "lateral_p", 732 "current_role", 733 "where", 734 "asc_p", 735 "placing", 736 "desc_p", 737 "user", 738 "unique", 739 "initially", 740 "column", 741 "both", 742 "some", 743 "as", 744 "any", 745 "only", 746 "deferrable", 747 "null_p", 748 "current_time", 749 "true_p", 750 "table", 751 "case", 752 "trailing", 753 "variadic", 754 "for", 755 "on", 756 "distinct", 757 "false_p", 758 "not", 759 "constraint", 760 "current_timestamp", 761 "returning", 762 "primary", 763 "intersect", 764 "having", 765 "analyze", 766 "current_user", 767 "and", 768 "cast", 769 "symmetric", 770 "using", 771 "order", 772 "current_catalog", 773 } 774 775 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 776 777 # DuckDB doesn't generally support CREATE TABLE .. properties 778 # https://duckdb.org/docs/sql/statements/create_table.html 779 PROPERTIES_LOCATION = { 780 prop: exp.Properties.Location.UNSUPPORTED 781 for prop in generator.Generator.PROPERTIES_LOCATION 782 } 783 784 # There are a few exceptions (e.g. temporary tables) which are supported or 785 # can be transpiled to DuckDB, so we explicitly override them accordingly 786 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 787 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 788 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 789 790 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 791 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 792 793 def strtotime_sql(self, expression: exp.StrToTime) -> str: 794 if expression.args.get("safe"): 795 formatted_time = self.format_time(expression) 796 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 797 return str_to_time_sql(self, expression) 798 799 def strtodate_sql(self, expression: exp.StrToDate) -> str: 800 if expression.args.get("safe"): 801 formatted_time = self.format_time(expression) 802 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 803 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 804 805 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 806 arg = expression.this 807 if expression.args.get("safe"): 808 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 809 return self.func("JSON", arg) 810 811 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 812 nano = expression.args.get("nano") 813 if nano is not None: 814 expression.set( 815 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 816 ) 817 818 return rename_func("MAKE_TIME")(self, expression) 819 820 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 821 sec = expression.args["sec"] 822 823 milli = expression.args.get("milli") 824 if milli is not None: 825 sec += milli.pop() / exp.Literal.number(1000.0) 826 827 nano = expression.args.get("nano") 828 if nano is not None: 829 sec += nano.pop() / exp.Literal.number(1000000000.0) 830 831 if milli or nano: 832 expression.set("sec", sec) 833 834 return rename_func("MAKE_TIMESTAMP")(self, expression) 835 836 def tablesample_sql( 837 self, 838 expression: exp.TableSample, 839 tablesample_keyword: t.Optional[str] = None, 840 ) -> str: 841 if not isinstance(expression.parent, exp.Select): 842 # This sample clause only applies to a single source, not the entire resulting relation 843 tablesample_keyword = "TABLESAMPLE" 844 845 if expression.args.get("size"): 846 method = expression.args.get("method") 847 if method and method.name.upper() != "RESERVOIR": 848 self.unsupported( 849 f"Sampling method {method} is not supported with a discrete sample count, " 850 "defaulting to reservoir sampling" 851 ) 852 expression.set("method", exp.var("RESERVOIR")) 853 854 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 855 856 def interval_sql(self, expression: exp.Interval) -> str: 857 multiplier: t.Optional[int] = None 858 unit = expression.text("unit").lower() 859 860 if unit.startswith("week"): 861 multiplier = 7 862 if unit.startswith("quarter"): 863 multiplier = 90 864 865 if multiplier: 866 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 867 868 return super().interval_sql(expression) 869 870 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 871 if isinstance(expression.parent, exp.UserDefinedFunction): 872 return self.sql(expression, "this") 873 return super().columndef_sql(expression, sep) 874 875 def join_sql(self, expression: exp.Join) -> str: 876 if ( 877 expression.side == "LEFT" 878 and not expression.args.get("on") 879 and isinstance(expression.this, exp.Unnest) 880 ): 881 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 882 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 883 return super().join_sql(expression.on(exp.true())) 884 885 return super().join_sql(expression) 886 887 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 888 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 889 if expression.args.get("is_end_exclusive"): 890 return rename_func("RANGE")(self, expression) 891 892 return self.function_fallback_sql(expression) 893 894 def bracket_sql(self, expression: exp.Bracket) -> str: 895 this = expression.this 896 if isinstance(this, exp.Array): 897 this.replace(exp.paren(this)) 898 899 bracket = super().bracket_sql(expression) 900 901 if not expression.args.get("returns_list_for_maps"): 902 if not this.type: 903 from sqlglot.optimizer.annotate_types import annotate_types 904 905 this = annotate_types(this) 906 907 if this.is_type(exp.DataType.Type.MAP): 908 bracket = f"({bracket})[1]" 909 910 return bracket 911 912 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 913 expression_sql = self.sql(expression, "expression") 914 915 func = expression.this 916 if isinstance(func, exp.PERCENTILES): 917 # Make the order key the first arg and slide the fraction to the right 918 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 919 order_col = expression.find(exp.Ordered) 920 if order_col: 921 func.set("expression", func.this) 922 func.set("this", order_col.this) 923 924 this = self.sql(expression, "this").rstrip(")") 925 926 return f"{this}{expression_sql})" 927 928 def length_sql(self, expression: exp.Length) -> str: 929 arg = expression.this 930 931 # Dialects like BQ and Snowflake also accept binary values as args, so 932 # DDB will attempt to infer the type or resort to case/when resolution 933 if not expression.args.get("binary") or arg.is_string: 934 return self.func("LENGTH", arg) 935 936 if not arg.type: 937 from sqlglot.optimizer.annotate_types import annotate_types 938 939 arg = annotate_types(arg) 940 941 if arg.is_type(*exp.DataType.TEXT_TYPES): 942 return self.func("LENGTH", arg) 943 944 # We need these casts to make duckdb's static type checker happy 945 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 946 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 947 948 case = ( 949 exp.case(self.func("TYPEOF", arg)) 950 .when( 951 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 952 ) # anonymous to break length_sql recursion 953 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 954 ) 955 956 return self.sql(case) 957 958 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 959 this = expression.this 960 key = expression.args.get("key") 961 key_sql = key.name if isinstance(key, exp.Expression) else "" 962 value_sql = self.sql(expression, "value") 963 964 kv_sql = f"{key_sql} := {value_sql}" 965 966 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 967 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 968 if isinstance(this, exp.Struct) and not this.expressions: 969 return self.func("STRUCT_PACK", kv_sql) 970 971 return self.func("STRUCT_INSERT", this, kv_sql) 972 973 def unnest_sql(self, expression: exp.Unnest) -> str: 974 explode_array = expression.args.get("explode_array") 975 if explode_array: 976 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 977 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 978 expression.expressions.append( 979 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 980 ) 981 982 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 983 alias = expression.args.get("alias") 984 if alias: 985 expression.set("alias", None) 986 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 987 988 unnest_sql = super().unnest_sql(expression) 989 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 990 return self.sql(select) 991 992 return super().unnest_sql(expression) 993 994 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 995 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 996 # DuckDB should render IGNORE NULLS only for the general-purpose 997 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 998 return super().ignorenulls_sql(expression) 999 1000 return self.sql(expression, "this") 1001 1002 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1003 this = self.sql(expression, "this") 1004 null_text = self.sql(expression, "null") 1005 1006 if null_text: 1007 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1008 1009 return self.func("ARRAY_TO_STRING", this, expression.expression) 1010 1011 @unsupported_args("position", "occurrence") 1012 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1013 group = expression.args.get("group") 1014 params = expression.args.get("parameters") 1015 1016 # Do not render group if there is no following argument, 1017 # and it's the default value for this dialect 1018 if ( 1019 not params 1020 and group 1021 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1022 ): 1023 group = None 1024 return self.func( 1025 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1026 ) 1027 1028 @unsupported_args("culture") 1029 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1030 fmt = expression.args.get("format") 1031 if fmt and fmt.is_int: 1032 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1033 1034 self.unsupported("Only integer formats are supported by NumberToStr") 1035 return self.function_fallback_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Specifies the strategy according to which identifiers should be normalized.
287 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 288 if isinstance(path, exp.Literal): 289 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 290 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 291 # This check ensures we'll avoid trying to parse these as JSON paths, which can 292 # either result in a noisy warning or in an invalid representation of the path. 293 path_text = path.name 294 if path_text.startswith("/") or "[#" in path_text: 295 return path 296 297 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- PRESERVE_ORIGINAL_NAMES
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- NUMBERS_CAN_BE_UNDERSCORE_SEPARATED
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
299 class Tokenizer(tokens.Tokenizer): 300 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 301 HEREDOC_STRINGS = ["$"] 302 303 HEREDOC_TAG_IS_IDENTIFIER = True 304 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "//": TokenType.DIV, 309 "**": TokenType.DSTAR, 310 "^@": TokenType.CARET_AT, 311 "@>": TokenType.AT_GT, 312 "<@": TokenType.LT_AT, 313 "ATTACH": TokenType.ATTACH, 314 "BINARY": TokenType.VARBINARY, 315 "BITSTRING": TokenType.BIT, 316 "BPCHAR": TokenType.TEXT, 317 "CHAR": TokenType.TEXT, 318 "CHARACTER VARYING": TokenType.TEXT, 319 "DETACH": TokenType.DETACH, 320 "EXCLUDE": TokenType.EXCEPT, 321 "LOGICAL": TokenType.BOOLEAN, 322 "ONLY": TokenType.ONLY, 323 "PIVOT_WIDER": TokenType.PIVOT, 324 "POSITIONAL": TokenType.POSITIONAL, 325 "SIGNED": TokenType.INT, 326 "STRING": TokenType.TEXT, 327 "SUMMARIZE": TokenType.SUMMARIZE, 328 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 329 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 330 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 331 "TIMESTAMP_US": TokenType.TIMESTAMP, 332 "UBIGINT": TokenType.UBIGINT, 333 "UINTEGER": TokenType.UINT, 334 "USMALLINT": TokenType.USMALLINT, 335 "UTINYINT": TokenType.UTINYINT, 336 "VARCHAR": TokenType.TEXT, 337 } 338 KEYWORDS.pop("/*+") 339 340 SINGLE_TOKENS = { 341 **tokens.Tokenizer.SINGLE_TOKENS, 342 "$": TokenType.PARAMETER, 343 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
345 class Parser(parser.Parser): 346 BITWISE = { 347 **parser.Parser.BITWISE, 348 TokenType.TILDA: exp.RegexpLike, 349 } 350 BITWISE.pop(TokenType.CARET) 351 352 RANGE_PARSERS = { 353 **parser.Parser.RANGE_PARSERS, 354 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 355 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 356 } 357 358 EXPONENT = { 359 **parser.Parser.EXPONENT, 360 TokenType.CARET: exp.Pow, 361 TokenType.DSTAR: exp.Pow, 362 } 363 364 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 365 366 FUNCTIONS = { 367 **parser.Parser.FUNCTIONS, 368 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 369 "ARRAY_SORT": exp.SortArray.from_arg_list, 370 "DATEDIFF": _build_date_diff, 371 "DATE_DIFF": _build_date_diff, 372 "DATE_TRUNC": date_trunc_to_time, 373 "DATETRUNC": date_trunc_to_time, 374 "DECODE": lambda args: exp.Decode( 375 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 376 ), 377 "ENCODE": lambda args: exp.Encode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "EPOCH": exp.TimeToUnix.from_arg_list, 381 "EPOCH_MS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 383 ), 384 "JSON": exp.ParseJSON.from_arg_list, 385 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 386 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 387 "LIST_HAS": exp.ArrayContains.from_arg_list, 388 "LIST_REVERSE_SORT": _build_sort_array_desc, 389 "LIST_SORT": exp.SortArray.from_arg_list, 390 "LIST_VALUE": lambda args: exp.Array(expressions=args), 391 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 392 "MAKE_TIMESTAMP": _build_make_timestamp, 393 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 394 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 395 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 396 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 397 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 398 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 399 this=seq_get(args, 0), 400 expression=seq_get(args, 1), 401 replacement=seq_get(args, 2), 402 modifiers=seq_get(args, 3), 403 ), 404 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 405 "STRING_SPLIT": exp.Split.from_arg_list, 406 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 407 "STRING_TO_ARRAY": exp.Split.from_arg_list, 408 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 409 "STRUCT_PACK": exp.Struct.from_arg_list, 410 "STR_SPLIT": exp.Split.from_arg_list, 411 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 412 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 413 "UNNEST": exp.Explode.from_arg_list, 414 "XOR": binary_from_function(exp.BitwiseXor), 415 "GENERATE_SERIES": _build_generate_series(), 416 "RANGE": _build_generate_series(end_exclusive=True), 417 "EDITDIST3": exp.Levenshtein.from_arg_list, 418 } 419 420 FUNCTIONS.pop("DATE_SUB") 421 FUNCTIONS.pop("GLOB") 422 423 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 424 FUNCTION_PARSERS.pop("DECODE") 425 426 NO_PAREN_FUNCTION_PARSERS = { 427 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 428 "MAP": lambda self: self._parse_map(), 429 } 430 431 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 432 TokenType.SEMI, 433 TokenType.ANTI, 434 } 435 436 PLACEHOLDER_PARSERS = { 437 **parser.Parser.PLACEHOLDER_PARSERS, 438 TokenType.PARAMETER: lambda self: ( 439 self.expression(exp.Placeholder, this=self._prev.text) 440 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 441 else None 442 ), 443 } 444 445 TYPE_CONVERTERS = { 446 # https://duckdb.org/docs/sql/data_types/numeric 447 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 448 # https://duckdb.org/docs/sql/data_types/text 449 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 450 } 451 452 STATEMENT_PARSERS = { 453 **parser.Parser.STATEMENT_PARSERS, 454 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 455 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 456 } 457 458 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 459 # https://duckdb.org/docs/sql/samples.html 460 sample = super()._parse_table_sample(as_modifier=as_modifier) 461 if sample and not sample.args.get("method"): 462 if sample.args.get("size"): 463 sample.set("method", exp.var("RESERVOIR")) 464 else: 465 sample.set("method", exp.var("SYSTEM")) 466 467 return sample 468 469 def _parse_bracket( 470 self, this: t.Optional[exp.Expression] = None 471 ) -> t.Optional[exp.Expression]: 472 bracket = super()._parse_bracket(this) 473 if isinstance(bracket, exp.Bracket): 474 bracket.set("returns_list_for_maps", True) 475 476 return bracket 477 478 def _parse_map(self) -> exp.ToMap | exp.Map: 479 if self._match(TokenType.L_BRACE, advance=False): 480 return self.expression(exp.ToMap, this=self._parse_bracket()) 481 482 args = self._parse_wrapped_csv(self._parse_assignment) 483 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 484 485 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 486 return self._parse_field_def() 487 488 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 489 if len(aggregations) == 1: 490 return super()._pivot_column_names(aggregations) 491 return pivot_column_names(aggregations, dialect="duckdb") 492 493 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 494 def _parse_attach_option() -> exp.AttachOption: 495 return self.expression( 496 exp.AttachOption, 497 this=self._parse_var(any_token=True), 498 expression=self._parse_field(any_token=True), 499 ) 500 501 self._match(TokenType.DATABASE) 502 exists = self._parse_exists(not_=is_attach) 503 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 504 505 if self._match(TokenType.L_PAREN, advance=False): 506 expressions = self._parse_wrapped_csv(_parse_attach_option) 507 else: 508 expressions = None 509 510 return ( 511 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 512 if is_attach 513 else self.expression(exp.Detach, this=this, exists=exists) 514 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
516 class Generator(generator.Generator): 517 PARAMETER_TOKEN = "$" 518 NAMED_PLACEHOLDER_TOKEN = "$" 519 JOIN_HINTS = False 520 TABLE_HINTS = False 521 QUERY_HINTS = False 522 LIMIT_FETCH = "LIMIT" 523 STRUCT_DELIMITER = ("(", ")") 524 RENAME_TABLE_WITH_DB = False 525 NVL2_SUPPORTED = False 526 SEMI_ANTI_JOIN_WITH_SIDE = False 527 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 528 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 529 LAST_DAY_SUPPORTS_DATE_PART = False 530 JSON_KEY_VALUE_PAIR_SEP = "," 531 IGNORE_NULLS_IN_FUNC = True 532 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 533 SUPPORTS_CREATE_TABLE_LIKE = False 534 MULTI_ARG_DISTINCT = False 535 CAN_IMPLEMENT_ARRAY_ANY = True 536 SUPPORTS_TO_NUMBER = False 537 COPY_HAS_INTO_KEYWORD = False 538 STAR_EXCEPT = "EXCLUDE" 539 PAD_FILL_PATTERN_IS_REQUIRED = True 540 ARRAY_CONCAT_IS_VAR_LEN = False 541 ARRAY_SIZE_DIM_REQUIRED = False 542 543 TRANSFORMS = { 544 **generator.Generator.TRANSFORMS, 545 exp.ApproxDistinct: approx_count_distinct_sql, 546 exp.Array: inline_array_unless_query, 547 exp.ArrayFilter: rename_func("LIST_FILTER"), 548 exp.ArraySort: _array_sort_sql, 549 exp.ArraySum: rename_func("LIST_SUM"), 550 exp.BitwiseXor: rename_func("XOR"), 551 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 552 exp.CurrentDate: lambda *_: "CURRENT_DATE", 553 exp.CurrentTime: lambda *_: "CURRENT_TIME", 554 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 555 exp.DayOfMonth: rename_func("DAYOFMONTH"), 556 exp.DayOfWeek: rename_func("DAYOFWEEK"), 557 exp.DayOfWeekIso: rename_func("ISODOW"), 558 exp.DayOfYear: rename_func("DAYOFYEAR"), 559 exp.DataType: _datatype_sql, 560 exp.Date: _date_sql, 561 exp.DateAdd: _date_delta_sql, 562 exp.DateFromParts: rename_func("MAKE_DATE"), 563 exp.DateSub: _date_delta_sql, 564 exp.DateDiff: _date_diff_sql, 565 exp.DateStrToDate: datestrtodate_sql, 566 exp.Datetime: no_datetime_sql, 567 exp.DatetimeSub: _date_delta_sql, 568 exp.DatetimeAdd: _date_delta_sql, 569 exp.DateToDi: lambda self, 570 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 571 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 572 exp.DiToDate: lambda self, 573 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 574 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 575 exp.GenerateDateArray: _generate_datetime_array_sql, 576 exp.GenerateTimestampArray: _generate_datetime_array_sql, 577 exp.Explode: rename_func("UNNEST"), 578 exp.IntDiv: lambda self, e: self.binary(e, "//"), 579 exp.IsInf: rename_func("ISINF"), 580 exp.IsNan: rename_func("ISNAN"), 581 exp.JSONBExists: rename_func("JSON_EXISTS"), 582 exp.JSONExtract: _arrow_json_extract_sql, 583 exp.JSONExtractArray: _json_extract_value_array_sql, 584 exp.JSONExtractScalar: _arrow_json_extract_sql, 585 exp.JSONFormat: _json_format_sql, 586 exp.JSONValueArray: _json_extract_value_array_sql, 587 exp.Lateral: explode_to_unnest_sql, 588 exp.LogicalOr: rename_func("BOOL_OR"), 589 exp.LogicalAnd: rename_func("BOOL_AND"), 590 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 591 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 592 exp.MonthsBetween: lambda self, e: self.func( 593 "DATEDIFF", 594 "'month'", 595 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 596 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 597 ), 598 exp.PercentileCont: rename_func("QUANTILE_CONT"), 599 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 600 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 601 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 602 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 603 exp.RegexpReplace: lambda self, e: self.func( 604 "REGEXP_REPLACE", 605 e.this, 606 e.expression, 607 e.args.get("replacement"), 608 e.args.get("modifiers"), 609 ), 610 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 611 exp.RegexpILike: lambda self, e: self.func( 612 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 613 ), 614 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 615 exp.Return: lambda self, e: self.sql(e, "this"), 616 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 617 exp.Rand: rename_func("RANDOM"), 618 exp.SHA: rename_func("SHA1"), 619 exp.SHA2: sha256_sql, 620 exp.Split: rename_func("STR_SPLIT"), 621 exp.SortArray: _sort_array_sql, 622 exp.StrPosition: strposition_sql, 623 exp.StrToUnix: lambda self, e: self.func( 624 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 625 ), 626 exp.Struct: _struct_sql, 627 exp.Transform: rename_func("LIST_TRANSFORM"), 628 exp.TimeAdd: _date_delta_sql, 629 exp.Time: no_time_sql, 630 exp.TimeDiff: _timediff_sql, 631 exp.Timestamp: no_timestamp_sql, 632 exp.TimestampDiff: lambda self, e: self.func( 633 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 634 ), 635 exp.TimestampTrunc: timestamptrunc_sql(), 636 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 637 exp.TimeStrToTime: timestrtotime_sql, 638 exp.TimeStrToUnix: lambda self, e: self.func( 639 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 640 ), 641 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 642 exp.TimeToUnix: rename_func("EPOCH"), 643 exp.TsOrDiToDi: lambda self, 644 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 645 exp.TsOrDsAdd: _date_delta_sql, 646 exp.TsOrDsDiff: lambda self, e: self.func( 647 "DATE_DIFF", 648 f"'{e.args.get('unit') or 'DAY'}'", 649 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 650 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 651 ), 652 exp.UnixToStr: lambda self, e: self.func( 653 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 654 ), 655 exp.DatetimeTrunc: lambda self, e: self.func( 656 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 657 ), 658 exp.UnixToTime: _unix_to_time_sql, 659 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 660 exp.VariancePop: rename_func("VAR_POP"), 661 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 662 exp.Xor: bool_xor_sql, 663 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 664 rename_func("LEVENSHTEIN") 665 ), 666 } 667 668 SUPPORTED_JSON_PATH_PARTS = { 669 exp.JSONPathKey, 670 exp.JSONPathRoot, 671 exp.JSONPathSubscript, 672 exp.JSONPathWildcard, 673 } 674 675 TYPE_MAPPING = { 676 **generator.Generator.TYPE_MAPPING, 677 exp.DataType.Type.BINARY: "BLOB", 678 exp.DataType.Type.BPCHAR: "TEXT", 679 exp.DataType.Type.CHAR: "TEXT", 680 exp.DataType.Type.DATETIME: "TIMESTAMP", 681 exp.DataType.Type.FLOAT: "REAL", 682 exp.DataType.Type.NCHAR: "TEXT", 683 exp.DataType.Type.NVARCHAR: "TEXT", 684 exp.DataType.Type.UINT: "UINTEGER", 685 exp.DataType.Type.VARBINARY: "BLOB", 686 exp.DataType.Type.ROWVERSION: "BLOB", 687 exp.DataType.Type.VARCHAR: "TEXT", 688 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 689 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 690 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 691 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 692 } 693 694 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 695 RESERVED_KEYWORDS = { 696 "array", 697 "analyse", 698 "union", 699 "all", 700 "when", 701 "in_p", 702 "default", 703 "create_p", 704 "window", 705 "asymmetric", 706 "to", 707 "else", 708 "localtime", 709 "from", 710 "end_p", 711 "select", 712 "current_date", 713 "foreign", 714 "with", 715 "grant", 716 "session_user", 717 "or", 718 "except", 719 "references", 720 "fetch", 721 "limit", 722 "group_p", 723 "leading", 724 "into", 725 "collate", 726 "offset", 727 "do", 728 "then", 729 "localtimestamp", 730 "check_p", 731 "lateral_p", 732 "current_role", 733 "where", 734 "asc_p", 735 "placing", 736 "desc_p", 737 "user", 738 "unique", 739 "initially", 740 "column", 741 "both", 742 "some", 743 "as", 744 "any", 745 "only", 746 "deferrable", 747 "null_p", 748 "current_time", 749 "true_p", 750 "table", 751 "case", 752 "trailing", 753 "variadic", 754 "for", 755 "on", 756 "distinct", 757 "false_p", 758 "not", 759 "constraint", 760 "current_timestamp", 761 "returning", 762 "primary", 763 "intersect", 764 "having", 765 "analyze", 766 "current_user", 767 "and", 768 "cast", 769 "symmetric", 770 "using", 771 "order", 772 "current_catalog", 773 } 774 775 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 776 777 # DuckDB doesn't generally support CREATE TABLE .. properties 778 # https://duckdb.org/docs/sql/statements/create_table.html 779 PROPERTIES_LOCATION = { 780 prop: exp.Properties.Location.UNSUPPORTED 781 for prop in generator.Generator.PROPERTIES_LOCATION 782 } 783 784 # There are a few exceptions (e.g. temporary tables) which are supported or 785 # can be transpiled to DuckDB, so we explicitly override them accordingly 786 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 787 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 788 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 789 790 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 791 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 792 793 def strtotime_sql(self, expression: exp.StrToTime) -> str: 794 if expression.args.get("safe"): 795 formatted_time = self.format_time(expression) 796 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 797 return str_to_time_sql(self, expression) 798 799 def strtodate_sql(self, expression: exp.StrToDate) -> str: 800 if expression.args.get("safe"): 801 formatted_time = self.format_time(expression) 802 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 803 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 804 805 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 806 arg = expression.this 807 if expression.args.get("safe"): 808 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 809 return self.func("JSON", arg) 810 811 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 812 nano = expression.args.get("nano") 813 if nano is not None: 814 expression.set( 815 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 816 ) 817 818 return rename_func("MAKE_TIME")(self, expression) 819 820 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 821 sec = expression.args["sec"] 822 823 milli = expression.args.get("milli") 824 if milli is not None: 825 sec += milli.pop() / exp.Literal.number(1000.0) 826 827 nano = expression.args.get("nano") 828 if nano is not None: 829 sec += nano.pop() / exp.Literal.number(1000000000.0) 830 831 if milli or nano: 832 expression.set("sec", sec) 833 834 return rename_func("MAKE_TIMESTAMP")(self, expression) 835 836 def tablesample_sql( 837 self, 838 expression: exp.TableSample, 839 tablesample_keyword: t.Optional[str] = None, 840 ) -> str: 841 if not isinstance(expression.parent, exp.Select): 842 # This sample clause only applies to a single source, not the entire resulting relation 843 tablesample_keyword = "TABLESAMPLE" 844 845 if expression.args.get("size"): 846 method = expression.args.get("method") 847 if method and method.name.upper() != "RESERVOIR": 848 self.unsupported( 849 f"Sampling method {method} is not supported with a discrete sample count, " 850 "defaulting to reservoir sampling" 851 ) 852 expression.set("method", exp.var("RESERVOIR")) 853 854 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 855 856 def interval_sql(self, expression: exp.Interval) -> str: 857 multiplier: t.Optional[int] = None 858 unit = expression.text("unit").lower() 859 860 if unit.startswith("week"): 861 multiplier = 7 862 if unit.startswith("quarter"): 863 multiplier = 90 864 865 if multiplier: 866 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 867 868 return super().interval_sql(expression) 869 870 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 871 if isinstance(expression.parent, exp.UserDefinedFunction): 872 return self.sql(expression, "this") 873 return super().columndef_sql(expression, sep) 874 875 def join_sql(self, expression: exp.Join) -> str: 876 if ( 877 expression.side == "LEFT" 878 and not expression.args.get("on") 879 and isinstance(expression.this, exp.Unnest) 880 ): 881 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 882 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 883 return super().join_sql(expression.on(exp.true())) 884 885 return super().join_sql(expression) 886 887 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 888 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 889 if expression.args.get("is_end_exclusive"): 890 return rename_func("RANGE")(self, expression) 891 892 return self.function_fallback_sql(expression) 893 894 def bracket_sql(self, expression: exp.Bracket) -> str: 895 this = expression.this 896 if isinstance(this, exp.Array): 897 this.replace(exp.paren(this)) 898 899 bracket = super().bracket_sql(expression) 900 901 if not expression.args.get("returns_list_for_maps"): 902 if not this.type: 903 from sqlglot.optimizer.annotate_types import annotate_types 904 905 this = annotate_types(this) 906 907 if this.is_type(exp.DataType.Type.MAP): 908 bracket = f"({bracket})[1]" 909 910 return bracket 911 912 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 913 expression_sql = self.sql(expression, "expression") 914 915 func = expression.this 916 if isinstance(func, exp.PERCENTILES): 917 # Make the order key the first arg and slide the fraction to the right 918 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 919 order_col = expression.find(exp.Ordered) 920 if order_col: 921 func.set("expression", func.this) 922 func.set("this", order_col.this) 923 924 this = self.sql(expression, "this").rstrip(")") 925 926 return f"{this}{expression_sql})" 927 928 def length_sql(self, expression: exp.Length) -> str: 929 arg = expression.this 930 931 # Dialects like BQ and Snowflake also accept binary values as args, so 932 # DDB will attempt to infer the type or resort to case/when resolution 933 if not expression.args.get("binary") or arg.is_string: 934 return self.func("LENGTH", arg) 935 936 if not arg.type: 937 from sqlglot.optimizer.annotate_types import annotate_types 938 939 arg = annotate_types(arg) 940 941 if arg.is_type(*exp.DataType.TEXT_TYPES): 942 return self.func("LENGTH", arg) 943 944 # We need these casts to make duckdb's static type checker happy 945 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 946 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 947 948 case = ( 949 exp.case(self.func("TYPEOF", arg)) 950 .when( 951 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 952 ) # anonymous to break length_sql recursion 953 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 954 ) 955 956 return self.sql(case) 957 958 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 959 this = expression.this 960 key = expression.args.get("key") 961 key_sql = key.name if isinstance(key, exp.Expression) else "" 962 value_sql = self.sql(expression, "value") 963 964 kv_sql = f"{key_sql} := {value_sql}" 965 966 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 967 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 968 if isinstance(this, exp.Struct) and not this.expressions: 969 return self.func("STRUCT_PACK", kv_sql) 970 971 return self.func("STRUCT_INSERT", this, kv_sql) 972 973 def unnest_sql(self, expression: exp.Unnest) -> str: 974 explode_array = expression.args.get("explode_array") 975 if explode_array: 976 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 977 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 978 expression.expressions.append( 979 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 980 ) 981 982 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 983 alias = expression.args.get("alias") 984 if alias: 985 expression.set("alias", None) 986 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 987 988 unnest_sql = super().unnest_sql(expression) 989 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 990 return self.sql(select) 991 992 return super().unnest_sql(expression) 993 994 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 995 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 996 # DuckDB should render IGNORE NULLS only for the general-purpose 997 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 998 return super().ignorenulls_sql(expression) 999 1000 return self.sql(expression, "this") 1001 1002 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1003 this = self.sql(expression, "this") 1004 null_text = self.sql(expression, "null") 1005 1006 if null_text: 1007 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1008 1009 return self.func("ARRAY_TO_STRING", this, expression.expression) 1010 1011 @unsupported_args("position", "occurrence") 1012 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1013 group = expression.args.get("group") 1014 params = expression.args.get("parameters") 1015 1016 # Do not render group if there is no following argument, 1017 # and it's the default value for this dialect 1018 if ( 1019 not params 1020 and group 1021 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1022 ): 1023 group = None 1024 return self.func( 1025 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1026 ) 1027 1028 @unsupported_args("culture") 1029 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1030 fmt = expression.args.get("format") 1031 if fmt and fmt.is_int: 1032 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1033 1034 self.unsupported("Only integer formats are supported by NumberToStr") 1035 return self.function_fallback_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
799 def strtodate_sql(self, expression: exp.StrToDate) -> str: 800 if expression.args.get("safe"): 801 formatted_time = self.format_time(expression) 802 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 803 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
811 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 812 nano = expression.args.get("nano") 813 if nano is not None: 814 expression.set( 815 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 816 ) 817 818 return rename_func("MAKE_TIME")(self, expression)
820 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 821 sec = expression.args["sec"] 822 823 milli = expression.args.get("milli") 824 if milli is not None: 825 sec += milli.pop() / exp.Literal.number(1000.0) 826 827 nano = expression.args.get("nano") 828 if nano is not None: 829 sec += nano.pop() / exp.Literal.number(1000000000.0) 830 831 if milli or nano: 832 expression.set("sec", sec) 833 834 return rename_func("MAKE_TIMESTAMP")(self, expression)
836 def tablesample_sql( 837 self, 838 expression: exp.TableSample, 839 tablesample_keyword: t.Optional[str] = None, 840 ) -> str: 841 if not isinstance(expression.parent, exp.Select): 842 # This sample clause only applies to a single source, not the entire resulting relation 843 tablesample_keyword = "TABLESAMPLE" 844 845 if expression.args.get("size"): 846 method = expression.args.get("method") 847 if method and method.name.upper() != "RESERVOIR": 848 self.unsupported( 849 f"Sampling method {method} is not supported with a discrete sample count, " 850 "defaulting to reservoir sampling" 851 ) 852 expression.set("method", exp.var("RESERVOIR")) 853 854 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
856 def interval_sql(self, expression: exp.Interval) -> str: 857 multiplier: t.Optional[int] = None 858 unit = expression.text("unit").lower() 859 860 if unit.startswith("week"): 861 multiplier = 7 862 if unit.startswith("quarter"): 863 multiplier = 90 864 865 if multiplier: 866 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 867 868 return super().interval_sql(expression)
875 def join_sql(self, expression: exp.Join) -> str: 876 if ( 877 expression.side == "LEFT" 878 and not expression.args.get("on") 879 and isinstance(expression.this, exp.Unnest) 880 ): 881 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 882 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 883 return super().join_sql(expression.on(exp.true())) 884 885 return super().join_sql(expression)
894 def bracket_sql(self, expression: exp.Bracket) -> str: 895 this = expression.this 896 if isinstance(this, exp.Array): 897 this.replace(exp.paren(this)) 898 899 bracket = super().bracket_sql(expression) 900 901 if not expression.args.get("returns_list_for_maps"): 902 if not this.type: 903 from sqlglot.optimizer.annotate_types import annotate_types 904 905 this = annotate_types(this) 906 907 if this.is_type(exp.DataType.Type.MAP): 908 bracket = f"({bracket})[1]" 909 910 return bracket
912 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 913 expression_sql = self.sql(expression, "expression") 914 915 func = expression.this 916 if isinstance(func, exp.PERCENTILES): 917 # Make the order key the first arg and slide the fraction to the right 918 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 919 order_col = expression.find(exp.Ordered) 920 if order_col: 921 func.set("expression", func.this) 922 func.set("this", order_col.this) 923 924 this = self.sql(expression, "this").rstrip(")") 925 926 return f"{this}{expression_sql})"
928 def length_sql(self, expression: exp.Length) -> str: 929 arg = expression.this 930 931 # Dialects like BQ and Snowflake also accept binary values as args, so 932 # DDB will attempt to infer the type or resort to case/when resolution 933 if not expression.args.get("binary") or arg.is_string: 934 return self.func("LENGTH", arg) 935 936 if not arg.type: 937 from sqlglot.optimizer.annotate_types import annotate_types 938 939 arg = annotate_types(arg) 940 941 if arg.is_type(*exp.DataType.TEXT_TYPES): 942 return self.func("LENGTH", arg) 943 944 # We need these casts to make duckdb's static type checker happy 945 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 946 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 947 948 case = ( 949 exp.case(self.func("TYPEOF", arg)) 950 .when( 951 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 952 ) # anonymous to break length_sql recursion 953 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 954 ) 955 956 return self.sql(case)
958 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 959 this = expression.this 960 key = expression.args.get("key") 961 key_sql = key.name if isinstance(key, exp.Expression) else "" 962 value_sql = self.sql(expression, "value") 963 964 kv_sql = f"{key_sql} := {value_sql}" 965 966 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 967 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 968 if isinstance(this, exp.Struct) and not this.expressions: 969 return self.func("STRUCT_PACK", kv_sql) 970 971 return self.func("STRUCT_INSERT", this, kv_sql)
973 def unnest_sql(self, expression: exp.Unnest) -> str: 974 explode_array = expression.args.get("explode_array") 975 if explode_array: 976 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 977 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 978 expression.expressions.append( 979 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 980 ) 981 982 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 983 alias = expression.args.get("alias") 984 if alias: 985 expression.set("alias", None) 986 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 987 988 unnest_sql = super().unnest_sql(expression) 989 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 990 return self.sql(select) 991 992 return super().unnest_sql(expression)
994 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 995 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 996 # DuckDB should render IGNORE NULLS only for the general-purpose 997 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 998 return super().ignorenulls_sql(expression) 999 1000 return self.sql(expression, "this")
1002 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1003 this = self.sql(expression, "this") 1004 null_text = self.sql(expression, "null") 1005 1006 if null_text: 1007 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1008 1009 return self.func("ARRAY_TO_STRING", this, expression.expression)
1011 @unsupported_args("position", "occurrence") 1012 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1013 group = expression.args.get("group") 1014 params = expression.args.get("parameters") 1015 1016 # Do not render group if there is no following argument, 1017 # and it's the default value for this dialect 1018 if ( 1019 not params 1020 and group 1021 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1022 ): 1023 group = None 1024 return self.func( 1025 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1026 )
1028 @unsupported_args("culture") 1029 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1030 fmt = expression.args.get("format") 1031 if fmt and fmt.is_int: 1032 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1033 1034 self.unsupported("Only integer formats are supported by NumberToStr") 1035 return self.function_fallback_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql