sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 regexp_extract_sql, 29 rename_func, 30 str_position_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38 explode_to_unnest_sql, 39) 40from sqlglot.helper import seq_get 41from sqlglot.tokens import TokenType 42from sqlglot.parser import binary_range_parser 43 44DATETIME_DELTA = t.Union[ 45 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 46] 47 48WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 49 exp.FirstValue, 50 exp.LastValue, 51 exp.Lag, 52 exp.Lead, 53 exp.NthValue, 54) 55 56 57def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 58 this = expression.this 59 unit = unit_to_var(expression) 60 op = ( 61 "+" 62 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 63 else "-" 64 ) 65 66 to_type: t.Optional[DATA_TYPE] = None 67 if isinstance(expression, exp.TsOrDsAdd): 68 to_type = expression.return_type 69 elif this.is_string: 70 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 71 to_type = ( 72 exp.DataType.Type.DATETIME 73 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 74 else exp.DataType.Type.DATE 75 ) 76 77 this = exp.cast(this, to_type) if to_type else this 78 79 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 80 81 82# BigQuery -> DuckDB conversion for the DATE function 83def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 84 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 85 zone = self.sql(expression, "zone") 86 87 if zone: 88 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 89 date_str = f"{date_str} || ' ' || {zone}" 90 91 # This will create a TIMESTAMP with time zone information 92 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 93 94 return result 95 96 97# BigQuery -> DuckDB conversion for the TIME_DIFF function 98def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 99 this = exp.cast(expression.this, exp.DataType.Type.TIME) 100 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 101 102 # Although the 2 dialects share similar signatures, BQ seems to inverse 103 # the sign of the result so the start/end time operands are flipped 104 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 105 106 107@generator.unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 108def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 109 return self.func("ARRAY_SORT", expression.this) 110 111 112def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 113 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 114 return self.func(name, expression.this) 115 116 117def _build_sort_array_desc(args: t.List) -> exp.Expression: 118 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 119 120 121def _build_date_diff(args: t.List) -> exp.Expression: 122 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 123 124 125def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 126 def _builder(args: t.List) -> exp.GenerateSeries: 127 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 128 if len(args) == 1: 129 # DuckDB uses 0 as a default for the series' start when it's omitted 130 args.insert(0, exp.Literal.number("0")) 131 132 gen_series = exp.GenerateSeries.from_arg_list(args) 133 gen_series.set("is_end_exclusive", end_exclusive) 134 135 return gen_series 136 137 return _builder 138 139 140def _build_make_timestamp(args: t.List) -> exp.Expression: 141 if len(args) == 1: 142 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 143 144 return exp.TimestampFromParts( 145 year=seq_get(args, 0), 146 month=seq_get(args, 1), 147 day=seq_get(args, 2), 148 hour=seq_get(args, 3), 149 min=seq_get(args, 4), 150 sec=seq_get(args, 5), 151 ) 152 153 154def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 155 args: t.List[str] = [] 156 157 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 158 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 159 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 160 ancestor_cast = expression.find_ancestor(exp.Cast) 161 is_struct_cast = ancestor_cast and any( 162 casted_type.is_type(exp.DataType.Type.STRUCT) 163 for casted_type in ancestor_cast.find_all(exp.DataType) 164 ) 165 166 for i, expr in enumerate(expression.expressions): 167 is_property_eq = isinstance(expr, exp.PropertyEQ) 168 value = expr.expression if is_property_eq else expr 169 170 if is_struct_cast: 171 args.append(self.sql(value)) 172 else: 173 key = expr.name if is_property_eq else f"_{i}" 174 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 175 176 csv_args = ", ".join(args) 177 178 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 179 180 181def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 182 if expression.is_type("array"): 183 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 184 185 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 186 if expression.is_type( 187 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 188 ): 189 return expression.this.value 190 191 return self.datatype_sql(expression) 192 193 194def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 195 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 196 return f"CAST({sql} AS TEXT)" 197 198 199def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 200 scale = expression.args.get("scale") 201 timestamp = expression.this 202 203 if scale in (None, exp.UnixToTime.SECONDS): 204 return self.func("TO_TIMESTAMP", timestamp) 205 if scale == exp.UnixToTime.MILLIS: 206 return self.func("EPOCH_MS", timestamp) 207 if scale == exp.UnixToTime.MICROS: 208 return self.func("MAKE_TIMESTAMP", timestamp) 209 210 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 211 212 213WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 214 215 216def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 217 arrow_sql = arrow_json_extract_sql(self, expression) 218 if not expression.same_parent and isinstance( 219 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 220 ): 221 arrow_sql = self.wrap(arrow_sql) 222 return arrow_sql 223 224 225def _implicit_datetime_cast( 226 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 227) -> t.Optional[exp.Expression]: 228 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 229 230 231def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 232 this = _implicit_datetime_cast(expression.this) 233 expr = _implicit_datetime_cast(expression.expression) 234 235 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 236 237 238def _generate_datetime_array_sql( 239 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 240) -> str: 241 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 242 243 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 244 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 245 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 246 247 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 248 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 249 start=start, end=end, step=expression.args.get("step") 250 ) 251 252 if is_generate_date_array: 253 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 254 # GENERATE_DATE_ARRAY we must cast it back to DATE array 255 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 256 257 return self.sql(gen_series) 258 259 260class DuckDB(Dialect): 261 NULL_ORDERING = "nulls_are_last" 262 SUPPORTS_USER_DEFINED_TYPES = False 263 SAFE_DIVISION = True 264 INDEX_OFFSET = 1 265 CONCAT_COALESCE = True 266 SUPPORTS_ORDER_BY_ALL = True 267 SUPPORTS_FIXED_SIZE_ARRAYS = True 268 STRICT_JSON_PATH_SYNTAX = False 269 270 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 271 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 272 273 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 274 if isinstance(path, exp.Literal): 275 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 276 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 277 # This check ensures we'll avoid trying to parse these as JSON paths, which can 278 # either result in a noisy warning or in an invalid representation of the path. 279 path_text = path.name 280 if path_text.startswith("/") or "[#" in path_text: 281 return path 282 283 return super().to_json_path(path) 284 285 class Tokenizer(tokens.Tokenizer): 286 HEREDOC_STRINGS = ["$"] 287 288 HEREDOC_TAG_IS_IDENTIFIER = True 289 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 290 291 KEYWORDS = { 292 **tokens.Tokenizer.KEYWORDS, 293 "//": TokenType.DIV, 294 "**": TokenType.DSTAR, 295 "^@": TokenType.CARET_AT, 296 "@>": TokenType.AT_GT, 297 "<@": TokenType.LT_AT, 298 "ATTACH": TokenType.COMMAND, 299 "BINARY": TokenType.VARBINARY, 300 "BITSTRING": TokenType.BIT, 301 "BPCHAR": TokenType.TEXT, 302 "CHAR": TokenType.TEXT, 303 "CHARACTER VARYING": TokenType.TEXT, 304 "EXCLUDE": TokenType.EXCEPT, 305 "LOGICAL": TokenType.BOOLEAN, 306 "ONLY": TokenType.ONLY, 307 "PIVOT_WIDER": TokenType.PIVOT, 308 "POSITIONAL": TokenType.POSITIONAL, 309 "SIGNED": TokenType.INT, 310 "STRING": TokenType.TEXT, 311 "SUMMARIZE": TokenType.SUMMARIZE, 312 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 313 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 314 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 315 "TIMESTAMP_US": TokenType.TIMESTAMP, 316 "UBIGINT": TokenType.UBIGINT, 317 "UINTEGER": TokenType.UINT, 318 "USMALLINT": TokenType.USMALLINT, 319 "UTINYINT": TokenType.UTINYINT, 320 "VARCHAR": TokenType.TEXT, 321 } 322 KEYWORDS.pop("/*+") 323 324 SINGLE_TOKENS = { 325 **tokens.Tokenizer.SINGLE_TOKENS, 326 "$": TokenType.PARAMETER, 327 } 328 329 class Parser(parser.Parser): 330 BITWISE = { 331 **parser.Parser.BITWISE, 332 TokenType.TILDA: exp.RegexpLike, 333 } 334 BITWISE.pop(TokenType.CARET) 335 336 RANGE_PARSERS = { 337 **parser.Parser.RANGE_PARSERS, 338 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 339 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 340 } 341 342 EXPONENT = { 343 **parser.Parser.EXPONENT, 344 TokenType.CARET: exp.Pow, 345 TokenType.DSTAR: exp.Pow, 346 } 347 348 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 349 350 FUNCTIONS = { 351 **parser.Parser.FUNCTIONS, 352 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 353 "ARRAY_SORT": exp.SortArray.from_arg_list, 354 "DATEDIFF": _build_date_diff, 355 "DATE_DIFF": _build_date_diff, 356 "DATE_TRUNC": date_trunc_to_time, 357 "DATETRUNC": date_trunc_to_time, 358 "DECODE": lambda args: exp.Decode( 359 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 360 ), 361 "ENCODE": lambda args: exp.Encode( 362 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 363 ), 364 "EPOCH": exp.TimeToUnix.from_arg_list, 365 "EPOCH_MS": lambda args: exp.UnixToTime( 366 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 367 ), 368 "JSON": exp.ParseJSON.from_arg_list, 369 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 370 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 371 "LIST_HAS": exp.ArrayContains.from_arg_list, 372 "LIST_REVERSE_SORT": _build_sort_array_desc, 373 "LIST_SORT": exp.SortArray.from_arg_list, 374 "LIST_VALUE": lambda args: exp.Array(expressions=args), 375 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 376 "MAKE_TIMESTAMP": _build_make_timestamp, 377 "MEDIAN": lambda args: exp.PercentileCont( 378 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 379 ), 380 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 381 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 382 "REGEXP_EXTRACT": build_regexp_extract, 383 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 384 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 385 this=seq_get(args, 0), 386 expression=seq_get(args, 1), 387 replacement=seq_get(args, 2), 388 modifiers=seq_get(args, 3), 389 ), 390 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 391 "STRING_SPLIT": exp.Split.from_arg_list, 392 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 393 "STRING_TO_ARRAY": exp.Split.from_arg_list, 394 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 395 "STRUCT_PACK": exp.Struct.from_arg_list, 396 "STR_SPLIT": exp.Split.from_arg_list, 397 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 398 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 399 "UNNEST": exp.Explode.from_arg_list, 400 "XOR": binary_from_function(exp.BitwiseXor), 401 "GENERATE_SERIES": _build_generate_series(), 402 "RANGE": _build_generate_series(end_exclusive=True), 403 } 404 405 FUNCTIONS.pop("DATE_SUB") 406 FUNCTIONS.pop("GLOB") 407 408 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 409 FUNCTION_PARSERS.pop("DECODE") 410 411 NO_PAREN_FUNCTION_PARSERS = { 412 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 413 "MAP": lambda self: self._parse_map(), 414 } 415 416 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 417 TokenType.SEMI, 418 TokenType.ANTI, 419 } 420 421 PLACEHOLDER_PARSERS = { 422 **parser.Parser.PLACEHOLDER_PARSERS, 423 TokenType.PARAMETER: lambda self: ( 424 self.expression(exp.Placeholder, this=self._prev.text) 425 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 426 else None 427 ), 428 } 429 430 TYPE_CONVERTERS = { 431 # https://duckdb.org/docs/sql/data_types/numeric 432 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 433 # https://duckdb.org/docs/sql/data_types/text 434 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 435 } 436 437 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 438 # https://duckdb.org/docs/sql/samples.html 439 sample = super()._parse_table_sample(as_modifier=as_modifier) 440 if sample and not sample.args.get("method"): 441 if sample.args.get("size"): 442 sample.set("method", exp.var("RESERVOIR")) 443 else: 444 sample.set("method", exp.var("SYSTEM")) 445 446 return sample 447 448 def _parse_bracket( 449 self, this: t.Optional[exp.Expression] = None 450 ) -> t.Optional[exp.Expression]: 451 bracket = super()._parse_bracket(this) 452 if isinstance(bracket, exp.Bracket): 453 bracket.set("returns_list_for_maps", True) 454 455 return bracket 456 457 def _parse_map(self) -> exp.ToMap | exp.Map: 458 if self._match(TokenType.L_BRACE, advance=False): 459 return self.expression(exp.ToMap, this=self._parse_bracket()) 460 461 args = self._parse_wrapped_csv(self._parse_assignment) 462 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 463 464 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 465 return self._parse_field_def() 466 467 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 468 if len(aggregations) == 1: 469 return super()._pivot_column_names(aggregations) 470 return pivot_column_names(aggregations, dialect="duckdb") 471 472 class Generator(generator.Generator): 473 PARAMETER_TOKEN = "$" 474 NAMED_PLACEHOLDER_TOKEN = "$" 475 JOIN_HINTS = False 476 TABLE_HINTS = False 477 QUERY_HINTS = False 478 LIMIT_FETCH = "LIMIT" 479 STRUCT_DELIMITER = ("(", ")") 480 RENAME_TABLE_WITH_DB = False 481 NVL2_SUPPORTED = False 482 SEMI_ANTI_JOIN_WITH_SIDE = False 483 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 484 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 485 LAST_DAY_SUPPORTS_DATE_PART = False 486 JSON_KEY_VALUE_PAIR_SEP = "," 487 IGNORE_NULLS_IN_FUNC = True 488 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 489 SUPPORTS_CREATE_TABLE_LIKE = False 490 MULTI_ARG_DISTINCT = False 491 CAN_IMPLEMENT_ARRAY_ANY = True 492 SUPPORTS_TO_NUMBER = False 493 COPY_HAS_INTO_KEYWORD = False 494 STAR_EXCEPT = "EXCLUDE" 495 PAD_FILL_PATTERN_IS_REQUIRED = True 496 ARRAY_CONCAT_IS_VAR_LEN = False 497 498 TRANSFORMS = { 499 **generator.Generator.TRANSFORMS, 500 exp.ApproxDistinct: approx_count_distinct_sql, 501 exp.Array: inline_array_unless_query, 502 exp.ArrayFilter: rename_func("LIST_FILTER"), 503 exp.ArraySize: rename_func("ARRAY_LENGTH"), 504 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 505 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 506 exp.ArraySort: _array_sort_sql, 507 exp.ArraySum: rename_func("LIST_SUM"), 508 exp.BitwiseXor: rename_func("XOR"), 509 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 510 exp.CurrentDate: lambda *_: "CURRENT_DATE", 511 exp.CurrentTime: lambda *_: "CURRENT_TIME", 512 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 513 exp.DayOfMonth: rename_func("DAYOFMONTH"), 514 exp.DayOfWeek: rename_func("DAYOFWEEK"), 515 exp.DayOfWeekIso: rename_func("ISODOW"), 516 exp.DayOfYear: rename_func("DAYOFYEAR"), 517 exp.DataType: _datatype_sql, 518 exp.Date: _date_sql, 519 exp.DateAdd: _date_delta_sql, 520 exp.DateFromParts: rename_func("MAKE_DATE"), 521 exp.DateSub: _date_delta_sql, 522 exp.DateDiff: _date_diff_sql, 523 exp.DateStrToDate: datestrtodate_sql, 524 exp.Datetime: no_datetime_sql, 525 exp.DatetimeSub: _date_delta_sql, 526 exp.DatetimeAdd: _date_delta_sql, 527 exp.DateToDi: lambda self, 528 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 529 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 530 exp.DiToDate: lambda self, 531 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 532 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 533 exp.GenerateDateArray: _generate_datetime_array_sql, 534 exp.GenerateTimestampArray: _generate_datetime_array_sql, 535 exp.Explode: rename_func("UNNEST"), 536 exp.IntDiv: lambda self, e: self.binary(e, "//"), 537 exp.IsInf: rename_func("ISINF"), 538 exp.IsNan: rename_func("ISNAN"), 539 exp.JSONExtract: _arrow_json_extract_sql, 540 exp.JSONExtractScalar: _arrow_json_extract_sql, 541 exp.JSONFormat: _json_format_sql, 542 exp.Lateral: explode_to_unnest_sql, 543 exp.LogicalOr: rename_func("BOOL_OR"), 544 exp.LogicalAnd: rename_func("BOOL_AND"), 545 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 546 exp.MonthsBetween: lambda self, e: self.func( 547 "DATEDIFF", 548 "'month'", 549 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 550 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 551 ), 552 exp.PercentileCont: rename_func("QUANTILE_CONT"), 553 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 554 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 555 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 556 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 557 exp.RegexpExtract: regexp_extract_sql, 558 exp.RegexpReplace: lambda self, e: self.func( 559 "REGEXP_REPLACE", 560 e.this, 561 e.expression, 562 e.args.get("replacement"), 563 e.args.get("modifiers"), 564 ), 565 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 566 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 567 exp.Return: lambda self, e: self.sql(e, "this"), 568 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 569 exp.Rand: rename_func("RANDOM"), 570 exp.SafeDivide: no_safe_divide_sql, 571 exp.SHA: rename_func("SHA1"), 572 exp.SHA2: sha256_sql, 573 exp.Split: rename_func("STR_SPLIT"), 574 exp.SortArray: _sort_array_sql, 575 exp.StrPosition: str_position_sql, 576 exp.StrToUnix: lambda self, e: self.func( 577 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 578 ), 579 exp.Struct: _struct_sql, 580 exp.Transform: rename_func("LIST_TRANSFORM"), 581 exp.TimeAdd: _date_delta_sql, 582 exp.Time: no_time_sql, 583 exp.TimeDiff: _timediff_sql, 584 exp.Timestamp: no_timestamp_sql, 585 exp.TimestampDiff: lambda self, e: self.func( 586 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 587 ), 588 exp.TimestampTrunc: timestamptrunc_sql(), 589 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 590 exp.TimeStrToTime: timestrtotime_sql, 591 exp.TimeStrToUnix: lambda self, e: self.func( 592 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 593 ), 594 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 595 exp.TimeToUnix: rename_func("EPOCH"), 596 exp.TsOrDiToDi: lambda self, 597 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 598 exp.TsOrDsAdd: _date_delta_sql, 599 exp.TsOrDsDiff: lambda self, e: self.func( 600 "DATE_DIFF", 601 f"'{e.args.get('unit') or 'DAY'}'", 602 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 603 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 604 ), 605 exp.UnixToStr: lambda self, e: self.func( 606 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 607 ), 608 exp.DatetimeTrunc: lambda self, e: self.func( 609 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 610 ), 611 exp.UnixToTime: _unix_to_time_sql, 612 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 613 exp.VariancePop: rename_func("VAR_POP"), 614 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 615 exp.Xor: bool_xor_sql, 616 } 617 618 SUPPORTED_JSON_PATH_PARTS = { 619 exp.JSONPathKey, 620 exp.JSONPathRoot, 621 exp.JSONPathSubscript, 622 exp.JSONPathWildcard, 623 } 624 625 TYPE_MAPPING = { 626 **generator.Generator.TYPE_MAPPING, 627 exp.DataType.Type.BINARY: "BLOB", 628 exp.DataType.Type.BPCHAR: "TEXT", 629 exp.DataType.Type.CHAR: "TEXT", 630 exp.DataType.Type.FLOAT: "REAL", 631 exp.DataType.Type.NCHAR: "TEXT", 632 exp.DataType.Type.NVARCHAR: "TEXT", 633 exp.DataType.Type.UINT: "UINTEGER", 634 exp.DataType.Type.VARBINARY: "BLOB", 635 exp.DataType.Type.ROWVERSION: "BLOB", 636 exp.DataType.Type.VARCHAR: "TEXT", 637 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 638 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 639 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 640 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 641 } 642 643 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 644 RESERVED_KEYWORDS = { 645 "array", 646 "analyse", 647 "union", 648 "all", 649 "when", 650 "in_p", 651 "default", 652 "create_p", 653 "window", 654 "asymmetric", 655 "to", 656 "else", 657 "localtime", 658 "from", 659 "end_p", 660 "select", 661 "current_date", 662 "foreign", 663 "with", 664 "grant", 665 "session_user", 666 "or", 667 "except", 668 "references", 669 "fetch", 670 "limit", 671 "group_p", 672 "leading", 673 "into", 674 "collate", 675 "offset", 676 "do", 677 "then", 678 "localtimestamp", 679 "check_p", 680 "lateral_p", 681 "current_role", 682 "where", 683 "asc_p", 684 "placing", 685 "desc_p", 686 "user", 687 "unique", 688 "initially", 689 "column", 690 "both", 691 "some", 692 "as", 693 "any", 694 "only", 695 "deferrable", 696 "null_p", 697 "current_time", 698 "true_p", 699 "table", 700 "case", 701 "trailing", 702 "variadic", 703 "for", 704 "on", 705 "distinct", 706 "false_p", 707 "not", 708 "constraint", 709 "current_timestamp", 710 "returning", 711 "primary", 712 "intersect", 713 "having", 714 "analyze", 715 "current_user", 716 "and", 717 "cast", 718 "symmetric", 719 "using", 720 "order", 721 "current_catalog", 722 } 723 724 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 725 726 # DuckDB doesn't generally support CREATE TABLE .. properties 727 # https://duckdb.org/docs/sql/statements/create_table.html 728 PROPERTIES_LOCATION = { 729 prop: exp.Properties.Location.UNSUPPORTED 730 for prop in generator.Generator.PROPERTIES_LOCATION 731 } 732 733 # There are a few exceptions (e.g. temporary tables) which are supported or 734 # can be transpiled to DuckDB, so we explicitly override them accordingly 735 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 736 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 737 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 738 739 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 740 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 741 742 def strtotime_sql(self, expression: exp.StrToTime) -> str: 743 if expression.args.get("safe"): 744 formatted_time = self.format_time(expression) 745 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 746 return str_to_time_sql(self, expression) 747 748 def strtodate_sql(self, expression: exp.StrToDate) -> str: 749 if expression.args.get("safe"): 750 formatted_time = self.format_time(expression) 751 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 752 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 753 754 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 755 arg = expression.this 756 if expression.args.get("safe"): 757 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 758 return self.func("JSON", arg) 759 760 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 761 nano = expression.args.get("nano") 762 if nano is not None: 763 expression.set( 764 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 765 ) 766 767 return rename_func("MAKE_TIME")(self, expression) 768 769 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 770 sec = expression.args["sec"] 771 772 milli = expression.args.get("milli") 773 if milli is not None: 774 sec += milli.pop() / exp.Literal.number(1000.0) 775 776 nano = expression.args.get("nano") 777 if nano is not None: 778 sec += nano.pop() / exp.Literal.number(1000000000.0) 779 780 if milli or nano: 781 expression.set("sec", sec) 782 783 return rename_func("MAKE_TIMESTAMP")(self, expression) 784 785 def tablesample_sql( 786 self, 787 expression: exp.TableSample, 788 tablesample_keyword: t.Optional[str] = None, 789 ) -> str: 790 if not isinstance(expression.parent, exp.Select): 791 # This sample clause only applies to a single source, not the entire resulting relation 792 tablesample_keyword = "TABLESAMPLE" 793 794 if expression.args.get("size"): 795 method = expression.args.get("method") 796 if method and method.name.upper() != "RESERVOIR": 797 self.unsupported( 798 f"Sampling method {method} is not supported with a discrete sample count, " 799 "defaulting to reservoir sampling" 800 ) 801 expression.set("method", exp.var("RESERVOIR")) 802 803 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 804 805 def interval_sql(self, expression: exp.Interval) -> str: 806 multiplier: t.Optional[int] = None 807 unit = expression.text("unit").lower() 808 809 if unit.startswith("week"): 810 multiplier = 7 811 if unit.startswith("quarter"): 812 multiplier = 90 813 814 if multiplier: 815 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 816 817 return super().interval_sql(expression) 818 819 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 820 if isinstance(expression.parent, exp.UserDefinedFunction): 821 return self.sql(expression, "this") 822 return super().columndef_sql(expression, sep) 823 824 def join_sql(self, expression: exp.Join) -> str: 825 if ( 826 expression.side == "LEFT" 827 and not expression.args.get("on") 828 and isinstance(expression.this, exp.Unnest) 829 ): 830 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 831 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 832 return super().join_sql(expression.on(exp.true())) 833 834 return super().join_sql(expression) 835 836 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 837 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 838 if expression.args.get("is_end_exclusive"): 839 return rename_func("RANGE")(self, expression) 840 841 return self.function_fallback_sql(expression) 842 843 def bracket_sql(self, expression: exp.Bracket) -> str: 844 this = expression.this 845 if isinstance(this, exp.Array): 846 this.replace(exp.paren(this)) 847 848 bracket = super().bracket_sql(expression) 849 850 if not expression.args.get("returns_list_for_maps"): 851 if not this.type: 852 from sqlglot.optimizer.annotate_types import annotate_types 853 854 this = annotate_types(this) 855 856 if this.is_type(exp.DataType.Type.MAP): 857 bracket = f"({bracket})[1]" 858 859 return bracket 860 861 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 862 expression_sql = self.sql(expression, "expression") 863 864 func = expression.this 865 if isinstance(func, exp.PERCENTILES): 866 # Make the order key the first arg and slide the fraction to the right 867 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 868 order_col = expression.find(exp.Ordered) 869 if order_col: 870 func.set("expression", func.this) 871 func.set("this", order_col.this) 872 873 this = self.sql(expression, "this").rstrip(")") 874 875 return f"{this}{expression_sql})" 876 877 def length_sql(self, expression: exp.Length) -> str: 878 arg = expression.this 879 880 # Dialects like BQ and Snowflake also accept binary values as args, so 881 # DDB will attempt to infer the type or resort to case/when resolution 882 if not expression.args.get("binary") or arg.is_string: 883 return self.func("LENGTH", arg) 884 885 if not arg.type: 886 from sqlglot.optimizer.annotate_types import annotate_types 887 888 arg = annotate_types(arg) 889 890 if arg.is_type(*exp.DataType.TEXT_TYPES): 891 return self.func("LENGTH", arg) 892 893 # We need these casts to make duckdb's static type checker happy 894 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 895 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 896 897 case = ( 898 exp.case(self.func("TYPEOF", arg)) 899 .when( 900 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 901 ) # anonymous to break length_sql recursion 902 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 903 ) 904 905 return self.sql(case) 906 907 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 908 this = expression.this 909 key = expression.args.get("key") 910 key_sql = key.name if isinstance(key, exp.Expression) else "" 911 value_sql = self.sql(expression, "value") 912 913 kv_sql = f"{key_sql} := {value_sql}" 914 915 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 916 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 917 if isinstance(this, exp.Struct) and not this.expressions: 918 return self.func("STRUCT_PACK", kv_sql) 919 920 return self.func("STRUCT_INSERT", this, kv_sql) 921 922 def unnest_sql(self, expression: exp.Unnest) -> str: 923 explode_array = expression.args.get("explode_array") 924 if explode_array: 925 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 926 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 927 expression.expressions.append( 928 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 929 ) 930 931 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 932 alias = expression.args.get("alias") 933 if alias: 934 expression.set("alias", None) 935 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 936 937 unnest_sql = super().unnest_sql(expression) 938 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 939 return self.sql(select) 940 941 return super().unnest_sql(expression) 942 943 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 944 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 945 # DuckDB should render IGNORE NULLS only for the general-purpose 946 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 947 return super().ignorenulls_sql(expression) 948 949 return self.sql(expression, "this") 950 951 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 952 this = self.sql(expression, "this") 953 null_text = self.sql(expression, "null") 954 955 if null_text: 956 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 957 958 return self.func("ARRAY_TO_STRING", this, expression.expression)
261class DuckDB(Dialect): 262 NULL_ORDERING = "nulls_are_last" 263 SUPPORTS_USER_DEFINED_TYPES = False 264 SAFE_DIVISION = True 265 INDEX_OFFSET = 1 266 CONCAT_COALESCE = True 267 SUPPORTS_ORDER_BY_ALL = True 268 SUPPORTS_FIXED_SIZE_ARRAYS = True 269 STRICT_JSON_PATH_SYNTAX = False 270 271 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 272 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 273 274 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 275 if isinstance(path, exp.Literal): 276 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 277 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 278 # This check ensures we'll avoid trying to parse these as JSON paths, which can 279 # either result in a noisy warning or in an invalid representation of the path. 280 path_text = path.name 281 if path_text.startswith("/") or "[#" in path_text: 282 return path 283 284 return super().to_json_path(path) 285 286 class Tokenizer(tokens.Tokenizer): 287 HEREDOC_STRINGS = ["$"] 288 289 HEREDOC_TAG_IS_IDENTIFIER = True 290 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 291 292 KEYWORDS = { 293 **tokens.Tokenizer.KEYWORDS, 294 "//": TokenType.DIV, 295 "**": TokenType.DSTAR, 296 "^@": TokenType.CARET_AT, 297 "@>": TokenType.AT_GT, 298 "<@": TokenType.LT_AT, 299 "ATTACH": TokenType.COMMAND, 300 "BINARY": TokenType.VARBINARY, 301 "BITSTRING": TokenType.BIT, 302 "BPCHAR": TokenType.TEXT, 303 "CHAR": TokenType.TEXT, 304 "CHARACTER VARYING": TokenType.TEXT, 305 "EXCLUDE": TokenType.EXCEPT, 306 "LOGICAL": TokenType.BOOLEAN, 307 "ONLY": TokenType.ONLY, 308 "PIVOT_WIDER": TokenType.PIVOT, 309 "POSITIONAL": TokenType.POSITIONAL, 310 "SIGNED": TokenType.INT, 311 "STRING": TokenType.TEXT, 312 "SUMMARIZE": TokenType.SUMMARIZE, 313 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 314 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 315 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 316 "TIMESTAMP_US": TokenType.TIMESTAMP, 317 "UBIGINT": TokenType.UBIGINT, 318 "UINTEGER": TokenType.UINT, 319 "USMALLINT": TokenType.USMALLINT, 320 "UTINYINT": TokenType.UTINYINT, 321 "VARCHAR": TokenType.TEXT, 322 } 323 KEYWORDS.pop("/*+") 324 325 SINGLE_TOKENS = { 326 **tokens.Tokenizer.SINGLE_TOKENS, 327 "$": TokenType.PARAMETER, 328 } 329 330 class Parser(parser.Parser): 331 BITWISE = { 332 **parser.Parser.BITWISE, 333 TokenType.TILDA: exp.RegexpLike, 334 } 335 BITWISE.pop(TokenType.CARET) 336 337 RANGE_PARSERS = { 338 **parser.Parser.RANGE_PARSERS, 339 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 340 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 341 } 342 343 EXPONENT = { 344 **parser.Parser.EXPONENT, 345 TokenType.CARET: exp.Pow, 346 TokenType.DSTAR: exp.Pow, 347 } 348 349 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 350 351 FUNCTIONS = { 352 **parser.Parser.FUNCTIONS, 353 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 354 "ARRAY_SORT": exp.SortArray.from_arg_list, 355 "DATEDIFF": _build_date_diff, 356 "DATE_DIFF": _build_date_diff, 357 "DATE_TRUNC": date_trunc_to_time, 358 "DATETRUNC": date_trunc_to_time, 359 "DECODE": lambda args: exp.Decode( 360 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 361 ), 362 "ENCODE": lambda args: exp.Encode( 363 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 364 ), 365 "EPOCH": exp.TimeToUnix.from_arg_list, 366 "EPOCH_MS": lambda args: exp.UnixToTime( 367 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 368 ), 369 "JSON": exp.ParseJSON.from_arg_list, 370 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 371 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 372 "LIST_HAS": exp.ArrayContains.from_arg_list, 373 "LIST_REVERSE_SORT": _build_sort_array_desc, 374 "LIST_SORT": exp.SortArray.from_arg_list, 375 "LIST_VALUE": lambda args: exp.Array(expressions=args), 376 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 377 "MAKE_TIMESTAMP": _build_make_timestamp, 378 "MEDIAN": lambda args: exp.PercentileCont( 379 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 380 ), 381 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 382 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 383 "REGEXP_EXTRACT": build_regexp_extract, 384 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 385 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 386 this=seq_get(args, 0), 387 expression=seq_get(args, 1), 388 replacement=seq_get(args, 2), 389 modifiers=seq_get(args, 3), 390 ), 391 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 392 "STRING_SPLIT": exp.Split.from_arg_list, 393 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 394 "STRING_TO_ARRAY": exp.Split.from_arg_list, 395 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 396 "STRUCT_PACK": exp.Struct.from_arg_list, 397 "STR_SPLIT": exp.Split.from_arg_list, 398 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 399 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 400 "UNNEST": exp.Explode.from_arg_list, 401 "XOR": binary_from_function(exp.BitwiseXor), 402 "GENERATE_SERIES": _build_generate_series(), 403 "RANGE": _build_generate_series(end_exclusive=True), 404 } 405 406 FUNCTIONS.pop("DATE_SUB") 407 FUNCTIONS.pop("GLOB") 408 409 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 410 FUNCTION_PARSERS.pop("DECODE") 411 412 NO_PAREN_FUNCTION_PARSERS = { 413 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 414 "MAP": lambda self: self._parse_map(), 415 } 416 417 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 418 TokenType.SEMI, 419 TokenType.ANTI, 420 } 421 422 PLACEHOLDER_PARSERS = { 423 **parser.Parser.PLACEHOLDER_PARSERS, 424 TokenType.PARAMETER: lambda self: ( 425 self.expression(exp.Placeholder, this=self._prev.text) 426 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 427 else None 428 ), 429 } 430 431 TYPE_CONVERTERS = { 432 # https://duckdb.org/docs/sql/data_types/numeric 433 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 434 # https://duckdb.org/docs/sql/data_types/text 435 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 436 } 437 438 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 439 # https://duckdb.org/docs/sql/samples.html 440 sample = super()._parse_table_sample(as_modifier=as_modifier) 441 if sample and not sample.args.get("method"): 442 if sample.args.get("size"): 443 sample.set("method", exp.var("RESERVOIR")) 444 else: 445 sample.set("method", exp.var("SYSTEM")) 446 447 return sample 448 449 def _parse_bracket( 450 self, this: t.Optional[exp.Expression] = None 451 ) -> t.Optional[exp.Expression]: 452 bracket = super()._parse_bracket(this) 453 if isinstance(bracket, exp.Bracket): 454 bracket.set("returns_list_for_maps", True) 455 456 return bracket 457 458 def _parse_map(self) -> exp.ToMap | exp.Map: 459 if self._match(TokenType.L_BRACE, advance=False): 460 return self.expression(exp.ToMap, this=self._parse_bracket()) 461 462 args = self._parse_wrapped_csv(self._parse_assignment) 463 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 464 465 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 466 return self._parse_field_def() 467 468 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 469 if len(aggregations) == 1: 470 return super()._pivot_column_names(aggregations) 471 return pivot_column_names(aggregations, dialect="duckdb") 472 473 class Generator(generator.Generator): 474 PARAMETER_TOKEN = "$" 475 NAMED_PLACEHOLDER_TOKEN = "$" 476 JOIN_HINTS = False 477 TABLE_HINTS = False 478 QUERY_HINTS = False 479 LIMIT_FETCH = "LIMIT" 480 STRUCT_DELIMITER = ("(", ")") 481 RENAME_TABLE_WITH_DB = False 482 NVL2_SUPPORTED = False 483 SEMI_ANTI_JOIN_WITH_SIDE = False 484 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 485 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 486 LAST_DAY_SUPPORTS_DATE_PART = False 487 JSON_KEY_VALUE_PAIR_SEP = "," 488 IGNORE_NULLS_IN_FUNC = True 489 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 490 SUPPORTS_CREATE_TABLE_LIKE = False 491 MULTI_ARG_DISTINCT = False 492 CAN_IMPLEMENT_ARRAY_ANY = True 493 SUPPORTS_TO_NUMBER = False 494 COPY_HAS_INTO_KEYWORD = False 495 STAR_EXCEPT = "EXCLUDE" 496 PAD_FILL_PATTERN_IS_REQUIRED = True 497 ARRAY_CONCAT_IS_VAR_LEN = False 498 499 TRANSFORMS = { 500 **generator.Generator.TRANSFORMS, 501 exp.ApproxDistinct: approx_count_distinct_sql, 502 exp.Array: inline_array_unless_query, 503 exp.ArrayFilter: rename_func("LIST_FILTER"), 504 exp.ArraySize: rename_func("ARRAY_LENGTH"), 505 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 506 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 507 exp.ArraySort: _array_sort_sql, 508 exp.ArraySum: rename_func("LIST_SUM"), 509 exp.BitwiseXor: rename_func("XOR"), 510 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 511 exp.CurrentDate: lambda *_: "CURRENT_DATE", 512 exp.CurrentTime: lambda *_: "CURRENT_TIME", 513 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 514 exp.DayOfMonth: rename_func("DAYOFMONTH"), 515 exp.DayOfWeek: rename_func("DAYOFWEEK"), 516 exp.DayOfWeekIso: rename_func("ISODOW"), 517 exp.DayOfYear: rename_func("DAYOFYEAR"), 518 exp.DataType: _datatype_sql, 519 exp.Date: _date_sql, 520 exp.DateAdd: _date_delta_sql, 521 exp.DateFromParts: rename_func("MAKE_DATE"), 522 exp.DateSub: _date_delta_sql, 523 exp.DateDiff: _date_diff_sql, 524 exp.DateStrToDate: datestrtodate_sql, 525 exp.Datetime: no_datetime_sql, 526 exp.DatetimeSub: _date_delta_sql, 527 exp.DatetimeAdd: _date_delta_sql, 528 exp.DateToDi: lambda self, 529 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 530 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 531 exp.DiToDate: lambda self, 532 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 533 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 534 exp.GenerateDateArray: _generate_datetime_array_sql, 535 exp.GenerateTimestampArray: _generate_datetime_array_sql, 536 exp.Explode: rename_func("UNNEST"), 537 exp.IntDiv: lambda self, e: self.binary(e, "//"), 538 exp.IsInf: rename_func("ISINF"), 539 exp.IsNan: rename_func("ISNAN"), 540 exp.JSONExtract: _arrow_json_extract_sql, 541 exp.JSONExtractScalar: _arrow_json_extract_sql, 542 exp.JSONFormat: _json_format_sql, 543 exp.Lateral: explode_to_unnest_sql, 544 exp.LogicalOr: rename_func("BOOL_OR"), 545 exp.LogicalAnd: rename_func("BOOL_AND"), 546 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 547 exp.MonthsBetween: lambda self, e: self.func( 548 "DATEDIFF", 549 "'month'", 550 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 551 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 552 ), 553 exp.PercentileCont: rename_func("QUANTILE_CONT"), 554 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 555 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 556 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 557 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 558 exp.RegexpExtract: regexp_extract_sql, 559 exp.RegexpReplace: lambda self, e: self.func( 560 "REGEXP_REPLACE", 561 e.this, 562 e.expression, 563 e.args.get("replacement"), 564 e.args.get("modifiers"), 565 ), 566 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 567 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 568 exp.Return: lambda self, e: self.sql(e, "this"), 569 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 570 exp.Rand: rename_func("RANDOM"), 571 exp.SafeDivide: no_safe_divide_sql, 572 exp.SHA: rename_func("SHA1"), 573 exp.SHA2: sha256_sql, 574 exp.Split: rename_func("STR_SPLIT"), 575 exp.SortArray: _sort_array_sql, 576 exp.StrPosition: str_position_sql, 577 exp.StrToUnix: lambda self, e: self.func( 578 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 579 ), 580 exp.Struct: _struct_sql, 581 exp.Transform: rename_func("LIST_TRANSFORM"), 582 exp.TimeAdd: _date_delta_sql, 583 exp.Time: no_time_sql, 584 exp.TimeDiff: _timediff_sql, 585 exp.Timestamp: no_timestamp_sql, 586 exp.TimestampDiff: lambda self, e: self.func( 587 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 588 ), 589 exp.TimestampTrunc: timestamptrunc_sql(), 590 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 591 exp.TimeStrToTime: timestrtotime_sql, 592 exp.TimeStrToUnix: lambda self, e: self.func( 593 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 594 ), 595 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 596 exp.TimeToUnix: rename_func("EPOCH"), 597 exp.TsOrDiToDi: lambda self, 598 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 599 exp.TsOrDsAdd: _date_delta_sql, 600 exp.TsOrDsDiff: lambda self, e: self.func( 601 "DATE_DIFF", 602 f"'{e.args.get('unit') or 'DAY'}'", 603 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 604 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 605 ), 606 exp.UnixToStr: lambda self, e: self.func( 607 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 608 ), 609 exp.DatetimeTrunc: lambda self, e: self.func( 610 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 611 ), 612 exp.UnixToTime: _unix_to_time_sql, 613 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 614 exp.VariancePop: rename_func("VAR_POP"), 615 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 616 exp.Xor: bool_xor_sql, 617 } 618 619 SUPPORTED_JSON_PATH_PARTS = { 620 exp.JSONPathKey, 621 exp.JSONPathRoot, 622 exp.JSONPathSubscript, 623 exp.JSONPathWildcard, 624 } 625 626 TYPE_MAPPING = { 627 **generator.Generator.TYPE_MAPPING, 628 exp.DataType.Type.BINARY: "BLOB", 629 exp.DataType.Type.BPCHAR: "TEXT", 630 exp.DataType.Type.CHAR: "TEXT", 631 exp.DataType.Type.FLOAT: "REAL", 632 exp.DataType.Type.NCHAR: "TEXT", 633 exp.DataType.Type.NVARCHAR: "TEXT", 634 exp.DataType.Type.UINT: "UINTEGER", 635 exp.DataType.Type.VARBINARY: "BLOB", 636 exp.DataType.Type.ROWVERSION: "BLOB", 637 exp.DataType.Type.VARCHAR: "TEXT", 638 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 639 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 640 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 641 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 642 } 643 644 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 645 RESERVED_KEYWORDS = { 646 "array", 647 "analyse", 648 "union", 649 "all", 650 "when", 651 "in_p", 652 "default", 653 "create_p", 654 "window", 655 "asymmetric", 656 "to", 657 "else", 658 "localtime", 659 "from", 660 "end_p", 661 "select", 662 "current_date", 663 "foreign", 664 "with", 665 "grant", 666 "session_user", 667 "or", 668 "except", 669 "references", 670 "fetch", 671 "limit", 672 "group_p", 673 "leading", 674 "into", 675 "collate", 676 "offset", 677 "do", 678 "then", 679 "localtimestamp", 680 "check_p", 681 "lateral_p", 682 "current_role", 683 "where", 684 "asc_p", 685 "placing", 686 "desc_p", 687 "user", 688 "unique", 689 "initially", 690 "column", 691 "both", 692 "some", 693 "as", 694 "any", 695 "only", 696 "deferrable", 697 "null_p", 698 "current_time", 699 "true_p", 700 "table", 701 "case", 702 "trailing", 703 "variadic", 704 "for", 705 "on", 706 "distinct", 707 "false_p", 708 "not", 709 "constraint", 710 "current_timestamp", 711 "returning", 712 "primary", 713 "intersect", 714 "having", 715 "analyze", 716 "current_user", 717 "and", 718 "cast", 719 "symmetric", 720 "using", 721 "order", 722 "current_catalog", 723 } 724 725 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 726 727 # DuckDB doesn't generally support CREATE TABLE .. properties 728 # https://duckdb.org/docs/sql/statements/create_table.html 729 PROPERTIES_LOCATION = { 730 prop: exp.Properties.Location.UNSUPPORTED 731 for prop in generator.Generator.PROPERTIES_LOCATION 732 } 733 734 # There are a few exceptions (e.g. temporary tables) which are supported or 735 # can be transpiled to DuckDB, so we explicitly override them accordingly 736 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 737 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 738 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 739 740 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 741 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 742 743 def strtotime_sql(self, expression: exp.StrToTime) -> str: 744 if expression.args.get("safe"): 745 formatted_time = self.format_time(expression) 746 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 747 return str_to_time_sql(self, expression) 748 749 def strtodate_sql(self, expression: exp.StrToDate) -> str: 750 if expression.args.get("safe"): 751 formatted_time = self.format_time(expression) 752 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 753 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 754 755 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 756 arg = expression.this 757 if expression.args.get("safe"): 758 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 759 return self.func("JSON", arg) 760 761 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 762 nano = expression.args.get("nano") 763 if nano is not None: 764 expression.set( 765 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 766 ) 767 768 return rename_func("MAKE_TIME")(self, expression) 769 770 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 771 sec = expression.args["sec"] 772 773 milli = expression.args.get("milli") 774 if milli is not None: 775 sec += milli.pop() / exp.Literal.number(1000.0) 776 777 nano = expression.args.get("nano") 778 if nano is not None: 779 sec += nano.pop() / exp.Literal.number(1000000000.0) 780 781 if milli or nano: 782 expression.set("sec", sec) 783 784 return rename_func("MAKE_TIMESTAMP")(self, expression) 785 786 def tablesample_sql( 787 self, 788 expression: exp.TableSample, 789 tablesample_keyword: t.Optional[str] = None, 790 ) -> str: 791 if not isinstance(expression.parent, exp.Select): 792 # This sample clause only applies to a single source, not the entire resulting relation 793 tablesample_keyword = "TABLESAMPLE" 794 795 if expression.args.get("size"): 796 method = expression.args.get("method") 797 if method and method.name.upper() != "RESERVOIR": 798 self.unsupported( 799 f"Sampling method {method} is not supported with a discrete sample count, " 800 "defaulting to reservoir sampling" 801 ) 802 expression.set("method", exp.var("RESERVOIR")) 803 804 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 805 806 def interval_sql(self, expression: exp.Interval) -> str: 807 multiplier: t.Optional[int] = None 808 unit = expression.text("unit").lower() 809 810 if unit.startswith("week"): 811 multiplier = 7 812 if unit.startswith("quarter"): 813 multiplier = 90 814 815 if multiplier: 816 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 817 818 return super().interval_sql(expression) 819 820 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 821 if isinstance(expression.parent, exp.UserDefinedFunction): 822 return self.sql(expression, "this") 823 return super().columndef_sql(expression, sep) 824 825 def join_sql(self, expression: exp.Join) -> str: 826 if ( 827 expression.side == "LEFT" 828 and not expression.args.get("on") 829 and isinstance(expression.this, exp.Unnest) 830 ): 831 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 832 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 833 return super().join_sql(expression.on(exp.true())) 834 835 return super().join_sql(expression) 836 837 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 838 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 839 if expression.args.get("is_end_exclusive"): 840 return rename_func("RANGE")(self, expression) 841 842 return self.function_fallback_sql(expression) 843 844 def bracket_sql(self, expression: exp.Bracket) -> str: 845 this = expression.this 846 if isinstance(this, exp.Array): 847 this.replace(exp.paren(this)) 848 849 bracket = super().bracket_sql(expression) 850 851 if not expression.args.get("returns_list_for_maps"): 852 if not this.type: 853 from sqlglot.optimizer.annotate_types import annotate_types 854 855 this = annotate_types(this) 856 857 if this.is_type(exp.DataType.Type.MAP): 858 bracket = f"({bracket})[1]" 859 860 return bracket 861 862 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 863 expression_sql = self.sql(expression, "expression") 864 865 func = expression.this 866 if isinstance(func, exp.PERCENTILES): 867 # Make the order key the first arg and slide the fraction to the right 868 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 869 order_col = expression.find(exp.Ordered) 870 if order_col: 871 func.set("expression", func.this) 872 func.set("this", order_col.this) 873 874 this = self.sql(expression, "this").rstrip(")") 875 876 return f"{this}{expression_sql})" 877 878 def length_sql(self, expression: exp.Length) -> str: 879 arg = expression.this 880 881 # Dialects like BQ and Snowflake also accept binary values as args, so 882 # DDB will attempt to infer the type or resort to case/when resolution 883 if not expression.args.get("binary") or arg.is_string: 884 return self.func("LENGTH", arg) 885 886 if not arg.type: 887 from sqlglot.optimizer.annotate_types import annotate_types 888 889 arg = annotate_types(arg) 890 891 if arg.is_type(*exp.DataType.TEXT_TYPES): 892 return self.func("LENGTH", arg) 893 894 # We need these casts to make duckdb's static type checker happy 895 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 896 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 897 898 case = ( 899 exp.case(self.func("TYPEOF", arg)) 900 .when( 901 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 902 ) # anonymous to break length_sql recursion 903 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 904 ) 905 906 return self.sql(case) 907 908 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 909 this = expression.this 910 key = expression.args.get("key") 911 key_sql = key.name if isinstance(key, exp.Expression) else "" 912 value_sql = self.sql(expression, "value") 913 914 kv_sql = f"{key_sql} := {value_sql}" 915 916 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 917 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 918 if isinstance(this, exp.Struct) and not this.expressions: 919 return self.func("STRUCT_PACK", kv_sql) 920 921 return self.func("STRUCT_INSERT", this, kv_sql) 922 923 def unnest_sql(self, expression: exp.Unnest) -> str: 924 explode_array = expression.args.get("explode_array") 925 if explode_array: 926 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 927 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 928 expression.expressions.append( 929 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 930 ) 931 932 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 933 alias = expression.args.get("alias") 934 if alias: 935 expression.set("alias", None) 936 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 937 938 unnest_sql = super().unnest_sql(expression) 939 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 940 return self.sql(select) 941 942 return super().unnest_sql(expression) 943 944 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 945 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 946 # DuckDB should render IGNORE NULLS only for the general-purpose 947 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 948 return super().ignorenulls_sql(expression) 949 950 return self.sql(expression, "this") 951 952 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 953 this = self.sql(expression, "this") 954 null_text = self.sql(expression, "null") 955 956 if null_text: 957 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 958 959 return self.func("ARRAY_TO_STRING", this, expression.expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Specifies the strategy according to which identifiers should be normalized.
274 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 275 if isinstance(path, exp.Literal): 276 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 277 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 278 # This check ensures we'll avoid trying to parse these as JSON paths, which can 279 # either result in a noisy warning or in an invalid representation of the path. 280 path_text = path.name 281 if path_text.startswith("/") or "[#" in path_text: 282 return path 283 284 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
286 class Tokenizer(tokens.Tokenizer): 287 HEREDOC_STRINGS = ["$"] 288 289 HEREDOC_TAG_IS_IDENTIFIER = True 290 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 291 292 KEYWORDS = { 293 **tokens.Tokenizer.KEYWORDS, 294 "//": TokenType.DIV, 295 "**": TokenType.DSTAR, 296 "^@": TokenType.CARET_AT, 297 "@>": TokenType.AT_GT, 298 "<@": TokenType.LT_AT, 299 "ATTACH": TokenType.COMMAND, 300 "BINARY": TokenType.VARBINARY, 301 "BITSTRING": TokenType.BIT, 302 "BPCHAR": TokenType.TEXT, 303 "CHAR": TokenType.TEXT, 304 "CHARACTER VARYING": TokenType.TEXT, 305 "EXCLUDE": TokenType.EXCEPT, 306 "LOGICAL": TokenType.BOOLEAN, 307 "ONLY": TokenType.ONLY, 308 "PIVOT_WIDER": TokenType.PIVOT, 309 "POSITIONAL": TokenType.POSITIONAL, 310 "SIGNED": TokenType.INT, 311 "STRING": TokenType.TEXT, 312 "SUMMARIZE": TokenType.SUMMARIZE, 313 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 314 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 315 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 316 "TIMESTAMP_US": TokenType.TIMESTAMP, 317 "UBIGINT": TokenType.UBIGINT, 318 "UINTEGER": TokenType.UINT, 319 "USMALLINT": TokenType.USMALLINT, 320 "UTINYINT": TokenType.UTINYINT, 321 "VARCHAR": TokenType.TEXT, 322 } 323 KEYWORDS.pop("/*+") 324 325 SINGLE_TOKENS = { 326 **tokens.Tokenizer.SINGLE_TOKENS, 327 "$": TokenType.PARAMETER, 328 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
330 class Parser(parser.Parser): 331 BITWISE = { 332 **parser.Parser.BITWISE, 333 TokenType.TILDA: exp.RegexpLike, 334 } 335 BITWISE.pop(TokenType.CARET) 336 337 RANGE_PARSERS = { 338 **parser.Parser.RANGE_PARSERS, 339 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 340 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 341 } 342 343 EXPONENT = { 344 **parser.Parser.EXPONENT, 345 TokenType.CARET: exp.Pow, 346 TokenType.DSTAR: exp.Pow, 347 } 348 349 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 350 351 FUNCTIONS = { 352 **parser.Parser.FUNCTIONS, 353 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 354 "ARRAY_SORT": exp.SortArray.from_arg_list, 355 "DATEDIFF": _build_date_diff, 356 "DATE_DIFF": _build_date_diff, 357 "DATE_TRUNC": date_trunc_to_time, 358 "DATETRUNC": date_trunc_to_time, 359 "DECODE": lambda args: exp.Decode( 360 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 361 ), 362 "ENCODE": lambda args: exp.Encode( 363 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 364 ), 365 "EPOCH": exp.TimeToUnix.from_arg_list, 366 "EPOCH_MS": lambda args: exp.UnixToTime( 367 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 368 ), 369 "JSON": exp.ParseJSON.from_arg_list, 370 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 371 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 372 "LIST_HAS": exp.ArrayContains.from_arg_list, 373 "LIST_REVERSE_SORT": _build_sort_array_desc, 374 "LIST_SORT": exp.SortArray.from_arg_list, 375 "LIST_VALUE": lambda args: exp.Array(expressions=args), 376 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 377 "MAKE_TIMESTAMP": _build_make_timestamp, 378 "MEDIAN": lambda args: exp.PercentileCont( 379 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 380 ), 381 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 382 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 383 "REGEXP_EXTRACT": build_regexp_extract, 384 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 385 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 386 this=seq_get(args, 0), 387 expression=seq_get(args, 1), 388 replacement=seq_get(args, 2), 389 modifiers=seq_get(args, 3), 390 ), 391 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 392 "STRING_SPLIT": exp.Split.from_arg_list, 393 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 394 "STRING_TO_ARRAY": exp.Split.from_arg_list, 395 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 396 "STRUCT_PACK": exp.Struct.from_arg_list, 397 "STR_SPLIT": exp.Split.from_arg_list, 398 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 399 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 400 "UNNEST": exp.Explode.from_arg_list, 401 "XOR": binary_from_function(exp.BitwiseXor), 402 "GENERATE_SERIES": _build_generate_series(), 403 "RANGE": _build_generate_series(end_exclusive=True), 404 } 405 406 FUNCTIONS.pop("DATE_SUB") 407 FUNCTIONS.pop("GLOB") 408 409 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 410 FUNCTION_PARSERS.pop("DECODE") 411 412 NO_PAREN_FUNCTION_PARSERS = { 413 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 414 "MAP": lambda self: self._parse_map(), 415 } 416 417 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 418 TokenType.SEMI, 419 TokenType.ANTI, 420 } 421 422 PLACEHOLDER_PARSERS = { 423 **parser.Parser.PLACEHOLDER_PARSERS, 424 TokenType.PARAMETER: lambda self: ( 425 self.expression(exp.Placeholder, this=self._prev.text) 426 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 427 else None 428 ), 429 } 430 431 TYPE_CONVERTERS = { 432 # https://duckdb.org/docs/sql/data_types/numeric 433 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 434 # https://duckdb.org/docs/sql/data_types/text 435 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 436 } 437 438 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 439 # https://duckdb.org/docs/sql/samples.html 440 sample = super()._parse_table_sample(as_modifier=as_modifier) 441 if sample and not sample.args.get("method"): 442 if sample.args.get("size"): 443 sample.set("method", exp.var("RESERVOIR")) 444 else: 445 sample.set("method", exp.var("SYSTEM")) 446 447 return sample 448 449 def _parse_bracket( 450 self, this: t.Optional[exp.Expression] = None 451 ) -> t.Optional[exp.Expression]: 452 bracket = super()._parse_bracket(this) 453 if isinstance(bracket, exp.Bracket): 454 bracket.set("returns_list_for_maps", True) 455 456 return bracket 457 458 def _parse_map(self) -> exp.ToMap | exp.Map: 459 if self._match(TokenType.L_BRACE, advance=False): 460 return self.expression(exp.ToMap, this=self._parse_bracket()) 461 462 args = self._parse_wrapped_csv(self._parse_assignment) 463 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 464 465 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 466 return self._parse_field_def() 467 468 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 469 if len(aggregations) == 1: 470 return super()._pivot_column_names(aggregations) 471 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
473 class Generator(generator.Generator): 474 PARAMETER_TOKEN = "$" 475 NAMED_PLACEHOLDER_TOKEN = "$" 476 JOIN_HINTS = False 477 TABLE_HINTS = False 478 QUERY_HINTS = False 479 LIMIT_FETCH = "LIMIT" 480 STRUCT_DELIMITER = ("(", ")") 481 RENAME_TABLE_WITH_DB = False 482 NVL2_SUPPORTED = False 483 SEMI_ANTI_JOIN_WITH_SIDE = False 484 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 485 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 486 LAST_DAY_SUPPORTS_DATE_PART = False 487 JSON_KEY_VALUE_PAIR_SEP = "," 488 IGNORE_NULLS_IN_FUNC = True 489 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 490 SUPPORTS_CREATE_TABLE_LIKE = False 491 MULTI_ARG_DISTINCT = False 492 CAN_IMPLEMENT_ARRAY_ANY = True 493 SUPPORTS_TO_NUMBER = False 494 COPY_HAS_INTO_KEYWORD = False 495 STAR_EXCEPT = "EXCLUDE" 496 PAD_FILL_PATTERN_IS_REQUIRED = True 497 ARRAY_CONCAT_IS_VAR_LEN = False 498 499 TRANSFORMS = { 500 **generator.Generator.TRANSFORMS, 501 exp.ApproxDistinct: approx_count_distinct_sql, 502 exp.Array: inline_array_unless_query, 503 exp.ArrayFilter: rename_func("LIST_FILTER"), 504 exp.ArraySize: rename_func("ARRAY_LENGTH"), 505 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 506 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 507 exp.ArraySort: _array_sort_sql, 508 exp.ArraySum: rename_func("LIST_SUM"), 509 exp.BitwiseXor: rename_func("XOR"), 510 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 511 exp.CurrentDate: lambda *_: "CURRENT_DATE", 512 exp.CurrentTime: lambda *_: "CURRENT_TIME", 513 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 514 exp.DayOfMonth: rename_func("DAYOFMONTH"), 515 exp.DayOfWeek: rename_func("DAYOFWEEK"), 516 exp.DayOfWeekIso: rename_func("ISODOW"), 517 exp.DayOfYear: rename_func("DAYOFYEAR"), 518 exp.DataType: _datatype_sql, 519 exp.Date: _date_sql, 520 exp.DateAdd: _date_delta_sql, 521 exp.DateFromParts: rename_func("MAKE_DATE"), 522 exp.DateSub: _date_delta_sql, 523 exp.DateDiff: _date_diff_sql, 524 exp.DateStrToDate: datestrtodate_sql, 525 exp.Datetime: no_datetime_sql, 526 exp.DatetimeSub: _date_delta_sql, 527 exp.DatetimeAdd: _date_delta_sql, 528 exp.DateToDi: lambda self, 529 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 530 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 531 exp.DiToDate: lambda self, 532 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 533 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 534 exp.GenerateDateArray: _generate_datetime_array_sql, 535 exp.GenerateTimestampArray: _generate_datetime_array_sql, 536 exp.Explode: rename_func("UNNEST"), 537 exp.IntDiv: lambda self, e: self.binary(e, "//"), 538 exp.IsInf: rename_func("ISINF"), 539 exp.IsNan: rename_func("ISNAN"), 540 exp.JSONExtract: _arrow_json_extract_sql, 541 exp.JSONExtractScalar: _arrow_json_extract_sql, 542 exp.JSONFormat: _json_format_sql, 543 exp.Lateral: explode_to_unnest_sql, 544 exp.LogicalOr: rename_func("BOOL_OR"), 545 exp.LogicalAnd: rename_func("BOOL_AND"), 546 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 547 exp.MonthsBetween: lambda self, e: self.func( 548 "DATEDIFF", 549 "'month'", 550 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 551 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 552 ), 553 exp.PercentileCont: rename_func("QUANTILE_CONT"), 554 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 555 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 556 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 557 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 558 exp.RegexpExtract: regexp_extract_sql, 559 exp.RegexpReplace: lambda self, e: self.func( 560 "REGEXP_REPLACE", 561 e.this, 562 e.expression, 563 e.args.get("replacement"), 564 e.args.get("modifiers"), 565 ), 566 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 567 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 568 exp.Return: lambda self, e: self.sql(e, "this"), 569 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 570 exp.Rand: rename_func("RANDOM"), 571 exp.SafeDivide: no_safe_divide_sql, 572 exp.SHA: rename_func("SHA1"), 573 exp.SHA2: sha256_sql, 574 exp.Split: rename_func("STR_SPLIT"), 575 exp.SortArray: _sort_array_sql, 576 exp.StrPosition: str_position_sql, 577 exp.StrToUnix: lambda self, e: self.func( 578 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 579 ), 580 exp.Struct: _struct_sql, 581 exp.Transform: rename_func("LIST_TRANSFORM"), 582 exp.TimeAdd: _date_delta_sql, 583 exp.Time: no_time_sql, 584 exp.TimeDiff: _timediff_sql, 585 exp.Timestamp: no_timestamp_sql, 586 exp.TimestampDiff: lambda self, e: self.func( 587 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 588 ), 589 exp.TimestampTrunc: timestamptrunc_sql(), 590 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 591 exp.TimeStrToTime: timestrtotime_sql, 592 exp.TimeStrToUnix: lambda self, e: self.func( 593 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 594 ), 595 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 596 exp.TimeToUnix: rename_func("EPOCH"), 597 exp.TsOrDiToDi: lambda self, 598 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 599 exp.TsOrDsAdd: _date_delta_sql, 600 exp.TsOrDsDiff: lambda self, e: self.func( 601 "DATE_DIFF", 602 f"'{e.args.get('unit') or 'DAY'}'", 603 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 604 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 605 ), 606 exp.UnixToStr: lambda self, e: self.func( 607 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 608 ), 609 exp.DatetimeTrunc: lambda self, e: self.func( 610 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 611 ), 612 exp.UnixToTime: _unix_to_time_sql, 613 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 614 exp.VariancePop: rename_func("VAR_POP"), 615 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 616 exp.Xor: bool_xor_sql, 617 } 618 619 SUPPORTED_JSON_PATH_PARTS = { 620 exp.JSONPathKey, 621 exp.JSONPathRoot, 622 exp.JSONPathSubscript, 623 exp.JSONPathWildcard, 624 } 625 626 TYPE_MAPPING = { 627 **generator.Generator.TYPE_MAPPING, 628 exp.DataType.Type.BINARY: "BLOB", 629 exp.DataType.Type.BPCHAR: "TEXT", 630 exp.DataType.Type.CHAR: "TEXT", 631 exp.DataType.Type.FLOAT: "REAL", 632 exp.DataType.Type.NCHAR: "TEXT", 633 exp.DataType.Type.NVARCHAR: "TEXT", 634 exp.DataType.Type.UINT: "UINTEGER", 635 exp.DataType.Type.VARBINARY: "BLOB", 636 exp.DataType.Type.ROWVERSION: "BLOB", 637 exp.DataType.Type.VARCHAR: "TEXT", 638 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 639 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 640 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 641 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 642 } 643 644 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 645 RESERVED_KEYWORDS = { 646 "array", 647 "analyse", 648 "union", 649 "all", 650 "when", 651 "in_p", 652 "default", 653 "create_p", 654 "window", 655 "asymmetric", 656 "to", 657 "else", 658 "localtime", 659 "from", 660 "end_p", 661 "select", 662 "current_date", 663 "foreign", 664 "with", 665 "grant", 666 "session_user", 667 "or", 668 "except", 669 "references", 670 "fetch", 671 "limit", 672 "group_p", 673 "leading", 674 "into", 675 "collate", 676 "offset", 677 "do", 678 "then", 679 "localtimestamp", 680 "check_p", 681 "lateral_p", 682 "current_role", 683 "where", 684 "asc_p", 685 "placing", 686 "desc_p", 687 "user", 688 "unique", 689 "initially", 690 "column", 691 "both", 692 "some", 693 "as", 694 "any", 695 "only", 696 "deferrable", 697 "null_p", 698 "current_time", 699 "true_p", 700 "table", 701 "case", 702 "trailing", 703 "variadic", 704 "for", 705 "on", 706 "distinct", 707 "false_p", 708 "not", 709 "constraint", 710 "current_timestamp", 711 "returning", 712 "primary", 713 "intersect", 714 "having", 715 "analyze", 716 "current_user", 717 "and", 718 "cast", 719 "symmetric", 720 "using", 721 "order", 722 "current_catalog", 723 } 724 725 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 726 727 # DuckDB doesn't generally support CREATE TABLE .. properties 728 # https://duckdb.org/docs/sql/statements/create_table.html 729 PROPERTIES_LOCATION = { 730 prop: exp.Properties.Location.UNSUPPORTED 731 for prop in generator.Generator.PROPERTIES_LOCATION 732 } 733 734 # There are a few exceptions (e.g. temporary tables) which are supported or 735 # can be transpiled to DuckDB, so we explicitly override them accordingly 736 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 737 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 738 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 739 740 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 741 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 742 743 def strtotime_sql(self, expression: exp.StrToTime) -> str: 744 if expression.args.get("safe"): 745 formatted_time = self.format_time(expression) 746 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 747 return str_to_time_sql(self, expression) 748 749 def strtodate_sql(self, expression: exp.StrToDate) -> str: 750 if expression.args.get("safe"): 751 formatted_time = self.format_time(expression) 752 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 753 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 754 755 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 756 arg = expression.this 757 if expression.args.get("safe"): 758 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 759 return self.func("JSON", arg) 760 761 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 762 nano = expression.args.get("nano") 763 if nano is not None: 764 expression.set( 765 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 766 ) 767 768 return rename_func("MAKE_TIME")(self, expression) 769 770 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 771 sec = expression.args["sec"] 772 773 milli = expression.args.get("milli") 774 if milli is not None: 775 sec += milli.pop() / exp.Literal.number(1000.0) 776 777 nano = expression.args.get("nano") 778 if nano is not None: 779 sec += nano.pop() / exp.Literal.number(1000000000.0) 780 781 if milli or nano: 782 expression.set("sec", sec) 783 784 return rename_func("MAKE_TIMESTAMP")(self, expression) 785 786 def tablesample_sql( 787 self, 788 expression: exp.TableSample, 789 tablesample_keyword: t.Optional[str] = None, 790 ) -> str: 791 if not isinstance(expression.parent, exp.Select): 792 # This sample clause only applies to a single source, not the entire resulting relation 793 tablesample_keyword = "TABLESAMPLE" 794 795 if expression.args.get("size"): 796 method = expression.args.get("method") 797 if method and method.name.upper() != "RESERVOIR": 798 self.unsupported( 799 f"Sampling method {method} is not supported with a discrete sample count, " 800 "defaulting to reservoir sampling" 801 ) 802 expression.set("method", exp.var("RESERVOIR")) 803 804 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 805 806 def interval_sql(self, expression: exp.Interval) -> str: 807 multiplier: t.Optional[int] = None 808 unit = expression.text("unit").lower() 809 810 if unit.startswith("week"): 811 multiplier = 7 812 if unit.startswith("quarter"): 813 multiplier = 90 814 815 if multiplier: 816 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 817 818 return super().interval_sql(expression) 819 820 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 821 if isinstance(expression.parent, exp.UserDefinedFunction): 822 return self.sql(expression, "this") 823 return super().columndef_sql(expression, sep) 824 825 def join_sql(self, expression: exp.Join) -> str: 826 if ( 827 expression.side == "LEFT" 828 and not expression.args.get("on") 829 and isinstance(expression.this, exp.Unnest) 830 ): 831 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 832 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 833 return super().join_sql(expression.on(exp.true())) 834 835 return super().join_sql(expression) 836 837 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 838 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 839 if expression.args.get("is_end_exclusive"): 840 return rename_func("RANGE")(self, expression) 841 842 return self.function_fallback_sql(expression) 843 844 def bracket_sql(self, expression: exp.Bracket) -> str: 845 this = expression.this 846 if isinstance(this, exp.Array): 847 this.replace(exp.paren(this)) 848 849 bracket = super().bracket_sql(expression) 850 851 if not expression.args.get("returns_list_for_maps"): 852 if not this.type: 853 from sqlglot.optimizer.annotate_types import annotate_types 854 855 this = annotate_types(this) 856 857 if this.is_type(exp.DataType.Type.MAP): 858 bracket = f"({bracket})[1]" 859 860 return bracket 861 862 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 863 expression_sql = self.sql(expression, "expression") 864 865 func = expression.this 866 if isinstance(func, exp.PERCENTILES): 867 # Make the order key the first arg and slide the fraction to the right 868 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 869 order_col = expression.find(exp.Ordered) 870 if order_col: 871 func.set("expression", func.this) 872 func.set("this", order_col.this) 873 874 this = self.sql(expression, "this").rstrip(")") 875 876 return f"{this}{expression_sql})" 877 878 def length_sql(self, expression: exp.Length) -> str: 879 arg = expression.this 880 881 # Dialects like BQ and Snowflake also accept binary values as args, so 882 # DDB will attempt to infer the type or resort to case/when resolution 883 if not expression.args.get("binary") or arg.is_string: 884 return self.func("LENGTH", arg) 885 886 if not arg.type: 887 from sqlglot.optimizer.annotate_types import annotate_types 888 889 arg = annotate_types(arg) 890 891 if arg.is_type(*exp.DataType.TEXT_TYPES): 892 return self.func("LENGTH", arg) 893 894 # We need these casts to make duckdb's static type checker happy 895 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 896 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 897 898 case = ( 899 exp.case(self.func("TYPEOF", arg)) 900 .when( 901 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 902 ) # anonymous to break length_sql recursion 903 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 904 ) 905 906 return self.sql(case) 907 908 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 909 this = expression.this 910 key = expression.args.get("key") 911 key_sql = key.name if isinstance(key, exp.Expression) else "" 912 value_sql = self.sql(expression, "value") 913 914 kv_sql = f"{key_sql} := {value_sql}" 915 916 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 917 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 918 if isinstance(this, exp.Struct) and not this.expressions: 919 return self.func("STRUCT_PACK", kv_sql) 920 921 return self.func("STRUCT_INSERT", this, kv_sql) 922 923 def unnest_sql(self, expression: exp.Unnest) -> str: 924 explode_array = expression.args.get("explode_array") 925 if explode_array: 926 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 927 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 928 expression.expressions.append( 929 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 930 ) 931 932 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 933 alias = expression.args.get("alias") 934 if alias: 935 expression.set("alias", None) 936 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 937 938 unnest_sql = super().unnest_sql(expression) 939 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 940 return self.sql(select) 941 942 return super().unnest_sql(expression) 943 944 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 945 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 946 # DuckDB should render IGNORE NULLS only for the general-purpose 947 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 948 return super().ignorenulls_sql(expression) 949 950 return self.sql(expression, "this") 951 952 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 953 this = self.sql(expression, "this") 954 null_text = self.sql(expression, "null") 955 956 if null_text: 957 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 958 959 return self.func("ARRAY_TO_STRING", this, expression.expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
749 def strtodate_sql(self, expression: exp.StrToDate) -> str: 750 if expression.args.get("safe"): 751 formatted_time = self.format_time(expression) 752 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 753 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
761 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 762 nano = expression.args.get("nano") 763 if nano is not None: 764 expression.set( 765 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 766 ) 767 768 return rename_func("MAKE_TIME")(self, expression)
770 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 771 sec = expression.args["sec"] 772 773 milli = expression.args.get("milli") 774 if milli is not None: 775 sec += milli.pop() / exp.Literal.number(1000.0) 776 777 nano = expression.args.get("nano") 778 if nano is not None: 779 sec += nano.pop() / exp.Literal.number(1000000000.0) 780 781 if milli or nano: 782 expression.set("sec", sec) 783 784 return rename_func("MAKE_TIMESTAMP")(self, expression)
786 def tablesample_sql( 787 self, 788 expression: exp.TableSample, 789 tablesample_keyword: t.Optional[str] = None, 790 ) -> str: 791 if not isinstance(expression.parent, exp.Select): 792 # This sample clause only applies to a single source, not the entire resulting relation 793 tablesample_keyword = "TABLESAMPLE" 794 795 if expression.args.get("size"): 796 method = expression.args.get("method") 797 if method and method.name.upper() != "RESERVOIR": 798 self.unsupported( 799 f"Sampling method {method} is not supported with a discrete sample count, " 800 "defaulting to reservoir sampling" 801 ) 802 expression.set("method", exp.var("RESERVOIR")) 803 804 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
806 def interval_sql(self, expression: exp.Interval) -> str: 807 multiplier: t.Optional[int] = None 808 unit = expression.text("unit").lower() 809 810 if unit.startswith("week"): 811 multiplier = 7 812 if unit.startswith("quarter"): 813 multiplier = 90 814 815 if multiplier: 816 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 817 818 return super().interval_sql(expression)
825 def join_sql(self, expression: exp.Join) -> str: 826 if ( 827 expression.side == "LEFT" 828 and not expression.args.get("on") 829 and isinstance(expression.this, exp.Unnest) 830 ): 831 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 832 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 833 return super().join_sql(expression.on(exp.true())) 834 835 return super().join_sql(expression)
844 def bracket_sql(self, expression: exp.Bracket) -> str: 845 this = expression.this 846 if isinstance(this, exp.Array): 847 this.replace(exp.paren(this)) 848 849 bracket = super().bracket_sql(expression) 850 851 if not expression.args.get("returns_list_for_maps"): 852 if not this.type: 853 from sqlglot.optimizer.annotate_types import annotate_types 854 855 this = annotate_types(this) 856 857 if this.is_type(exp.DataType.Type.MAP): 858 bracket = f"({bracket})[1]" 859 860 return bracket
862 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 863 expression_sql = self.sql(expression, "expression") 864 865 func = expression.this 866 if isinstance(func, exp.PERCENTILES): 867 # Make the order key the first arg and slide the fraction to the right 868 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 869 order_col = expression.find(exp.Ordered) 870 if order_col: 871 func.set("expression", func.this) 872 func.set("this", order_col.this) 873 874 this = self.sql(expression, "this").rstrip(")") 875 876 return f"{this}{expression_sql})"
878 def length_sql(self, expression: exp.Length) -> str: 879 arg = expression.this 880 881 # Dialects like BQ and Snowflake also accept binary values as args, so 882 # DDB will attempt to infer the type or resort to case/when resolution 883 if not expression.args.get("binary") or arg.is_string: 884 return self.func("LENGTH", arg) 885 886 if not arg.type: 887 from sqlglot.optimizer.annotate_types import annotate_types 888 889 arg = annotate_types(arg) 890 891 if arg.is_type(*exp.DataType.TEXT_TYPES): 892 return self.func("LENGTH", arg) 893 894 # We need these casts to make duckdb's static type checker happy 895 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 896 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 897 898 case = ( 899 exp.case(self.func("TYPEOF", arg)) 900 .when( 901 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 902 ) # anonymous to break length_sql recursion 903 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 904 ) 905 906 return self.sql(case)
908 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 909 this = expression.this 910 key = expression.args.get("key") 911 key_sql = key.name if isinstance(key, exp.Expression) else "" 912 value_sql = self.sql(expression, "value") 913 914 kv_sql = f"{key_sql} := {value_sql}" 915 916 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 917 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 918 if isinstance(this, exp.Struct) and not this.expressions: 919 return self.func("STRUCT_PACK", kv_sql) 920 921 return self.func("STRUCT_INSERT", this, kv_sql)
923 def unnest_sql(self, expression: exp.Unnest) -> str: 924 explode_array = expression.args.get("explode_array") 925 if explode_array: 926 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 927 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 928 expression.expressions.append( 929 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 930 ) 931 932 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 933 alias = expression.args.get("alias") 934 if alias: 935 expression.set("alias", None) 936 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 937 938 unnest_sql = super().unnest_sql(expression) 939 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 940 return self.sql(select) 941 942 return super().unnest_sql(expression)
944 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 945 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 946 # DuckDB should render IGNORE NULLS only for the general-purpose 947 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 948 return super().ignorenulls_sql(expression) 949 950 return self.sql(expression, "this")
952 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 953 this = self.sql(expression, "this") 954 null_text = self.sql(expression, "null") 955 956 if null_text: 957 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 958 959 return self.func("ARRAY_TO_STRING", this, expression.expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql