sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 regexp_extract_sql, 29 rename_func, 30 str_position_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36) 37from sqlglot.helper import seq_get 38from sqlglot.tokens import TokenType 39 40DATETIME_DELTA = t.Union[ 41 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 42] 43 44 45def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 46 this = expression.this 47 unit = unit_to_var(expression) 48 op = ( 49 "+" 50 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 51 else "-" 52 ) 53 54 to_type: t.Optional[DATA_TYPE] = None 55 if isinstance(expression, exp.TsOrDsAdd): 56 to_type = expression.return_type 57 elif this.is_string: 58 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 59 to_type = ( 60 exp.DataType.Type.DATETIME 61 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 62 else exp.DataType.Type.DATE 63 ) 64 65 this = exp.cast(this, to_type) if to_type else this 66 67 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 68 69 70# BigQuery -> DuckDB conversion for the DATE function 71def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 72 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 73 zone = self.sql(expression, "zone") 74 75 if zone: 76 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 77 date_str = f"{date_str} || ' ' || {zone}" 78 79 # This will create a TIMESTAMP with time zone information 80 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 81 82 return result 83 84 85# BigQuery -> DuckDB conversion for the TIME_DIFF function 86def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 87 this = exp.cast(expression.this, exp.DataType.Type.TIME) 88 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 89 90 # Although the 2 dialects share similar signatures, BQ seems to inverse 91 # the sign of the result so the start/end time operands are flipped 92 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 93 94 95def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 96 if expression.expression: 97 self.unsupported("DuckDB ARRAY_SORT does not support a comparator") 98 return self.func("ARRAY_SORT", expression.this) 99 100 101def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 102 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 103 return self.func(name, expression.this) 104 105 106def _build_sort_array_desc(args: t.List) -> exp.Expression: 107 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 108 109 110def _build_date_diff(args: t.List) -> exp.Expression: 111 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 112 113 114def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 115 def _builder(args: t.List) -> exp.GenerateSeries: 116 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 117 if len(args) == 1: 118 # DuckDB uses 0 as a default for the series' start when it's omitted 119 args.insert(0, exp.Literal.number("0")) 120 121 gen_series = exp.GenerateSeries.from_arg_list(args) 122 gen_series.set("is_end_exclusive", end_exclusive) 123 124 return gen_series 125 126 return _builder 127 128 129def _build_make_timestamp(args: t.List) -> exp.Expression: 130 if len(args) == 1: 131 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 132 133 return exp.TimestampFromParts( 134 year=seq_get(args, 0), 135 month=seq_get(args, 1), 136 day=seq_get(args, 2), 137 hour=seq_get(args, 3), 138 min=seq_get(args, 4), 139 sec=seq_get(args, 5), 140 ) 141 142 143def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 144 args: t.List[str] = [] 145 146 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 147 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 148 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 149 ancestor_cast = expression.find_ancestor(exp.Cast) 150 is_struct_cast = ancestor_cast and any( 151 casted_type.is_type(exp.DataType.Type.STRUCT) 152 for casted_type in ancestor_cast.find_all(exp.DataType) 153 ) 154 155 for i, expr in enumerate(expression.expressions): 156 is_property_eq = isinstance(expr, exp.PropertyEQ) 157 value = expr.expression if is_property_eq else expr 158 159 if is_struct_cast: 160 args.append(self.sql(value)) 161 else: 162 key = expr.name if is_property_eq else f"_{i}" 163 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 164 165 csv_args = ", ".join(args) 166 167 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 168 169 170def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 171 if expression.is_type("array"): 172 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 173 174 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 175 if expression.is_type( 176 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 177 ): 178 return expression.this.value 179 180 return self.datatype_sql(expression) 181 182 183def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 184 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 185 return f"CAST({sql} AS TEXT)" 186 187 188def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 189 scale = expression.args.get("scale") 190 timestamp = expression.this 191 192 if scale in (None, exp.UnixToTime.SECONDS): 193 return self.func("TO_TIMESTAMP", timestamp) 194 if scale == exp.UnixToTime.MILLIS: 195 return self.func("EPOCH_MS", timestamp) 196 if scale == exp.UnixToTime.MICROS: 197 return self.func("MAKE_TIMESTAMP", timestamp) 198 199 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 200 201 202WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 203 204 205def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 206 arrow_sql = arrow_json_extract_sql(self, expression) 207 if not expression.same_parent and isinstance( 208 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 209 ): 210 arrow_sql = self.wrap(arrow_sql) 211 return arrow_sql 212 213 214def _implicit_datetime_cast( 215 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 216) -> t.Optional[exp.Expression]: 217 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 218 219 220def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 221 this = _implicit_datetime_cast(expression.this) 222 expr = _implicit_datetime_cast(expression.expression) 223 224 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 225 226 227def _generate_datetime_array_sql( 228 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 229) -> str: 230 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 231 232 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 233 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 234 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 235 236 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 237 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 238 start=start, end=end, step=expression.args.get("step") 239 ) 240 241 if is_generate_date_array: 242 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 243 # GENERATE_DATE_ARRAY we must cast it back to DATE array 244 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 245 246 return self.sql(gen_series) 247 248 249class DuckDB(Dialect): 250 NULL_ORDERING = "nulls_are_last" 251 SUPPORTS_USER_DEFINED_TYPES = False 252 SAFE_DIVISION = True 253 INDEX_OFFSET = 1 254 CONCAT_COALESCE = True 255 SUPPORTS_ORDER_BY_ALL = True 256 SUPPORTS_FIXED_SIZE_ARRAYS = True 257 258 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 259 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 260 261 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 262 if isinstance(path, exp.Literal): 263 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 264 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 265 # This check ensures we'll avoid trying to parse these as JSON paths, which can 266 # either result in a noisy warning or in an invalid representation of the path. 267 path_text = path.name 268 if path_text.startswith("/") or "[#" in path_text: 269 return path 270 271 return super().to_json_path(path) 272 273 class Tokenizer(tokens.Tokenizer): 274 HEREDOC_STRINGS = ["$"] 275 276 HEREDOC_TAG_IS_IDENTIFIER = True 277 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 278 279 KEYWORDS = { 280 **tokens.Tokenizer.KEYWORDS, 281 "//": TokenType.DIV, 282 "ATTACH": TokenType.COMMAND, 283 "BINARY": TokenType.VARBINARY, 284 "BITSTRING": TokenType.BIT, 285 "BPCHAR": TokenType.TEXT, 286 "CHAR": TokenType.TEXT, 287 "CHARACTER VARYING": TokenType.TEXT, 288 "EXCLUDE": TokenType.EXCEPT, 289 "LOGICAL": TokenType.BOOLEAN, 290 "ONLY": TokenType.ONLY, 291 "PIVOT_WIDER": TokenType.PIVOT, 292 "POSITIONAL": TokenType.POSITIONAL, 293 "SIGNED": TokenType.INT, 294 "STRING": TokenType.TEXT, 295 "SUMMARIZE": TokenType.SUMMARIZE, 296 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 297 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 298 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 299 "TIMESTAMP_US": TokenType.TIMESTAMP, 300 "UBIGINT": TokenType.UBIGINT, 301 "UINTEGER": TokenType.UINT, 302 "USMALLINT": TokenType.USMALLINT, 303 "UTINYINT": TokenType.UTINYINT, 304 "VARCHAR": TokenType.TEXT, 305 } 306 KEYWORDS.pop("/*+") 307 308 SINGLE_TOKENS = { 309 **tokens.Tokenizer.SINGLE_TOKENS, 310 "$": TokenType.PARAMETER, 311 } 312 313 class Parser(parser.Parser): 314 BITWISE = { 315 **parser.Parser.BITWISE, 316 TokenType.TILDA: exp.RegexpLike, 317 } 318 319 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 320 321 FUNCTIONS = { 322 **parser.Parser.FUNCTIONS, 323 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 324 "ARRAY_SORT": exp.SortArray.from_arg_list, 325 "DATEDIFF": _build_date_diff, 326 "DATE_DIFF": _build_date_diff, 327 "DATE_TRUNC": date_trunc_to_time, 328 "DATETRUNC": date_trunc_to_time, 329 "DECODE": lambda args: exp.Decode( 330 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 331 ), 332 "ENCODE": lambda args: exp.Encode( 333 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 334 ), 335 "EPOCH": exp.TimeToUnix.from_arg_list, 336 "EPOCH_MS": lambda args: exp.UnixToTime( 337 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 338 ), 339 "JSON": exp.ParseJSON.from_arg_list, 340 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 341 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 342 "LIST_HAS": exp.ArrayContains.from_arg_list, 343 "LIST_REVERSE_SORT": _build_sort_array_desc, 344 "LIST_SORT": exp.SortArray.from_arg_list, 345 "LIST_VALUE": lambda args: exp.Array(expressions=args), 346 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 347 "MAKE_TIMESTAMP": _build_make_timestamp, 348 "MEDIAN": lambda args: exp.PercentileCont( 349 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 350 ), 351 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 352 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 353 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 354 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 355 ), 356 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 357 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 358 this=seq_get(args, 0), 359 expression=seq_get(args, 1), 360 replacement=seq_get(args, 2), 361 modifiers=seq_get(args, 3), 362 ), 363 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 364 "STRING_SPLIT": exp.Split.from_arg_list, 365 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 366 "STRING_TO_ARRAY": exp.Split.from_arg_list, 367 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 368 "STRUCT_PACK": exp.Struct.from_arg_list, 369 "STR_SPLIT": exp.Split.from_arg_list, 370 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 371 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 372 "UNNEST": exp.Explode.from_arg_list, 373 "XOR": binary_from_function(exp.BitwiseXor), 374 "GENERATE_SERIES": _build_generate_series(), 375 "RANGE": _build_generate_series(end_exclusive=True), 376 } 377 378 FUNCTIONS.pop("DATE_SUB") 379 380 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 381 FUNCTION_PARSERS.pop("DECODE") 382 383 NO_PAREN_FUNCTION_PARSERS = { 384 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 385 "MAP": lambda self: self._parse_map(), 386 } 387 388 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 389 TokenType.SEMI, 390 TokenType.ANTI, 391 } 392 393 PLACEHOLDER_PARSERS = { 394 **parser.Parser.PLACEHOLDER_PARSERS, 395 TokenType.PARAMETER: lambda self: ( 396 self.expression(exp.Placeholder, this=self._prev.text) 397 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 398 else None 399 ), 400 } 401 402 TYPE_CONVERTERS = { 403 # https://duckdb.org/docs/sql/data_types/numeric 404 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 405 # https://duckdb.org/docs/sql/data_types/text 406 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 407 } 408 409 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 410 # https://duckdb.org/docs/sql/samples.html 411 sample = super()._parse_table_sample(as_modifier=as_modifier) 412 if sample and not sample.args.get("method"): 413 if sample.args.get("size"): 414 sample.set("method", exp.var("RESERVOIR")) 415 else: 416 sample.set("method", exp.var("SYSTEM")) 417 418 return sample 419 420 def _parse_bracket( 421 self, this: t.Optional[exp.Expression] = None 422 ) -> t.Optional[exp.Expression]: 423 bracket = super()._parse_bracket(this) 424 if isinstance(bracket, exp.Bracket): 425 bracket.set("returns_list_for_maps", True) 426 427 return bracket 428 429 def _parse_map(self) -> exp.ToMap | exp.Map: 430 if self._match(TokenType.L_BRACE, advance=False): 431 return self.expression(exp.ToMap, this=self._parse_bracket()) 432 433 args = self._parse_wrapped_csv(self._parse_assignment) 434 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 435 436 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 437 return self._parse_field_def() 438 439 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 440 if len(aggregations) == 1: 441 return super()._pivot_column_names(aggregations) 442 return pivot_column_names(aggregations, dialect="duckdb") 443 444 class Generator(generator.Generator): 445 PARAMETER_TOKEN = "$" 446 NAMED_PLACEHOLDER_TOKEN = "$" 447 JOIN_HINTS = False 448 TABLE_HINTS = False 449 QUERY_HINTS = False 450 LIMIT_FETCH = "LIMIT" 451 STRUCT_DELIMITER = ("(", ")") 452 RENAME_TABLE_WITH_DB = False 453 NVL2_SUPPORTED = False 454 SEMI_ANTI_JOIN_WITH_SIDE = False 455 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 456 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 457 LAST_DAY_SUPPORTS_DATE_PART = False 458 JSON_KEY_VALUE_PAIR_SEP = "," 459 IGNORE_NULLS_IN_FUNC = True 460 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 461 SUPPORTS_CREATE_TABLE_LIKE = False 462 MULTI_ARG_DISTINCT = False 463 CAN_IMPLEMENT_ARRAY_ANY = True 464 SUPPORTS_TO_NUMBER = False 465 COPY_HAS_INTO_KEYWORD = False 466 STAR_EXCEPT = "EXCLUDE" 467 PAD_FILL_PATTERN_IS_REQUIRED = True 468 ARRAY_CONCAT_IS_VAR_LEN = False 469 470 TRANSFORMS = { 471 **generator.Generator.TRANSFORMS, 472 exp.ApproxDistinct: approx_count_distinct_sql, 473 exp.Array: inline_array_unless_query, 474 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 475 exp.ArrayFilter: rename_func("LIST_FILTER"), 476 exp.ArraySize: rename_func("ARRAY_LENGTH"), 477 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 478 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 479 exp.ArraySort: _array_sort_sql, 480 exp.ArraySum: rename_func("LIST_SUM"), 481 exp.BitwiseXor: rename_func("XOR"), 482 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 483 exp.CurrentDate: lambda *_: "CURRENT_DATE", 484 exp.CurrentTime: lambda *_: "CURRENT_TIME", 485 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 486 exp.DayOfMonth: rename_func("DAYOFMONTH"), 487 exp.DayOfWeek: rename_func("DAYOFWEEK"), 488 exp.DayOfYear: rename_func("DAYOFYEAR"), 489 exp.DataType: _datatype_sql, 490 exp.Date: _date_sql, 491 exp.DateAdd: _date_delta_sql, 492 exp.DateFromParts: rename_func("MAKE_DATE"), 493 exp.DateSub: _date_delta_sql, 494 exp.DateDiff: _date_diff_sql, 495 exp.DateStrToDate: datestrtodate_sql, 496 exp.Datetime: no_datetime_sql, 497 exp.DatetimeSub: _date_delta_sql, 498 exp.DatetimeAdd: _date_delta_sql, 499 exp.DateToDi: lambda self, 500 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 501 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 502 exp.DiToDate: lambda self, 503 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 504 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 505 exp.GenerateDateArray: _generate_datetime_array_sql, 506 exp.GenerateTimestampArray: _generate_datetime_array_sql, 507 exp.Explode: rename_func("UNNEST"), 508 exp.IntDiv: lambda self, e: self.binary(e, "//"), 509 exp.IsInf: rename_func("ISINF"), 510 exp.IsNan: rename_func("ISNAN"), 511 exp.JSONExtract: _arrow_json_extract_sql, 512 exp.JSONExtractScalar: _arrow_json_extract_sql, 513 exp.JSONFormat: _json_format_sql, 514 exp.LogicalOr: rename_func("BOOL_OR"), 515 exp.LogicalAnd: rename_func("BOOL_AND"), 516 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 517 exp.MonthsBetween: lambda self, e: self.func( 518 "DATEDIFF", 519 "'month'", 520 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 521 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 522 ), 523 exp.PercentileCont: rename_func("QUANTILE_CONT"), 524 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 525 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 526 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 527 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 528 exp.RegexpExtract: regexp_extract_sql, 529 exp.RegexpReplace: lambda self, e: self.func( 530 "REGEXP_REPLACE", 531 e.this, 532 e.expression, 533 e.args.get("replacement"), 534 e.args.get("modifiers"), 535 ), 536 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 537 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 538 exp.Return: lambda self, e: self.sql(e, "this"), 539 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 540 exp.Rand: rename_func("RANDOM"), 541 exp.SafeDivide: no_safe_divide_sql, 542 exp.Split: rename_func("STR_SPLIT"), 543 exp.SortArray: _sort_array_sql, 544 exp.StrPosition: str_position_sql, 545 exp.StrToUnix: lambda self, e: self.func( 546 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 547 ), 548 exp.Struct: _struct_sql, 549 exp.TimeAdd: _date_delta_sql, 550 exp.Time: no_time_sql, 551 exp.TimeDiff: _timediff_sql, 552 exp.Timestamp: no_timestamp_sql, 553 exp.TimestampDiff: lambda self, e: self.func( 554 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 555 ), 556 exp.TimestampTrunc: timestamptrunc_sql(), 557 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 558 exp.TimeStrToTime: timestrtotime_sql, 559 exp.TimeStrToUnix: lambda self, e: self.func( 560 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 561 ), 562 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 563 exp.TimeToUnix: rename_func("EPOCH"), 564 exp.TsOrDiToDi: lambda self, 565 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 566 exp.TsOrDsAdd: _date_delta_sql, 567 exp.TsOrDsDiff: lambda self, e: self.func( 568 "DATE_DIFF", 569 f"'{e.args.get('unit') or 'DAY'}'", 570 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 571 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 572 ), 573 exp.UnixToStr: lambda self, e: self.func( 574 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 575 ), 576 exp.DatetimeTrunc: lambda self, e: self.func( 577 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 578 ), 579 exp.UnixToTime: _unix_to_time_sql, 580 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 581 exp.VariancePop: rename_func("VAR_POP"), 582 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 583 exp.Xor: bool_xor_sql, 584 } 585 586 SUPPORTED_JSON_PATH_PARTS = { 587 exp.JSONPathKey, 588 exp.JSONPathRoot, 589 exp.JSONPathSubscript, 590 exp.JSONPathWildcard, 591 } 592 593 TYPE_MAPPING = { 594 **generator.Generator.TYPE_MAPPING, 595 exp.DataType.Type.BINARY: "BLOB", 596 exp.DataType.Type.BPCHAR: "TEXT", 597 exp.DataType.Type.CHAR: "TEXT", 598 exp.DataType.Type.FLOAT: "REAL", 599 exp.DataType.Type.NCHAR: "TEXT", 600 exp.DataType.Type.NVARCHAR: "TEXT", 601 exp.DataType.Type.UINT: "UINTEGER", 602 exp.DataType.Type.VARBINARY: "BLOB", 603 exp.DataType.Type.ROWVERSION: "BLOB", 604 exp.DataType.Type.VARCHAR: "TEXT", 605 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 606 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 607 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 608 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 609 } 610 611 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 612 RESERVED_KEYWORDS = { 613 "array", 614 "analyse", 615 "union", 616 "all", 617 "when", 618 "in_p", 619 "default", 620 "create_p", 621 "window", 622 "asymmetric", 623 "to", 624 "else", 625 "localtime", 626 "from", 627 "end_p", 628 "select", 629 "current_date", 630 "foreign", 631 "with", 632 "grant", 633 "session_user", 634 "or", 635 "except", 636 "references", 637 "fetch", 638 "limit", 639 "group_p", 640 "leading", 641 "into", 642 "collate", 643 "offset", 644 "do", 645 "then", 646 "localtimestamp", 647 "check_p", 648 "lateral_p", 649 "current_role", 650 "where", 651 "asc_p", 652 "placing", 653 "desc_p", 654 "user", 655 "unique", 656 "initially", 657 "column", 658 "both", 659 "some", 660 "as", 661 "any", 662 "only", 663 "deferrable", 664 "null_p", 665 "current_time", 666 "true_p", 667 "table", 668 "case", 669 "trailing", 670 "variadic", 671 "for", 672 "on", 673 "distinct", 674 "false_p", 675 "not", 676 "constraint", 677 "current_timestamp", 678 "returning", 679 "primary", 680 "intersect", 681 "having", 682 "analyze", 683 "current_user", 684 "and", 685 "cast", 686 "symmetric", 687 "using", 688 "order", 689 "current_catalog", 690 } 691 692 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 693 694 # DuckDB doesn't generally support CREATE TABLE .. properties 695 # https://duckdb.org/docs/sql/statements/create_table.html 696 PROPERTIES_LOCATION = { 697 prop: exp.Properties.Location.UNSUPPORTED 698 for prop in generator.Generator.PROPERTIES_LOCATION 699 } 700 701 # There are a few exceptions (e.g. temporary tables) which are supported or 702 # can be transpiled to DuckDB, so we explicitly override them accordingly 703 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 704 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 705 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 706 707 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 708 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 709 710 def strtotime_sql(self, expression: exp.StrToTime) -> str: 711 if expression.args.get("safe"): 712 formatted_time = self.format_time(expression) 713 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 714 return str_to_time_sql(self, expression) 715 716 def strtodate_sql(self, expression: exp.StrToDate) -> str: 717 if expression.args.get("safe"): 718 formatted_time = self.format_time(expression) 719 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 720 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 721 722 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 723 arg = expression.this 724 if expression.args.get("safe"): 725 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 726 return self.func("JSON", arg) 727 728 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 729 nano = expression.args.get("nano") 730 if nano is not None: 731 expression.set( 732 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 733 ) 734 735 return rename_func("MAKE_TIME")(self, expression) 736 737 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 738 sec = expression.args["sec"] 739 740 milli = expression.args.get("milli") 741 if milli is not None: 742 sec += milli.pop() / exp.Literal.number(1000.0) 743 744 nano = expression.args.get("nano") 745 if nano is not None: 746 sec += nano.pop() / exp.Literal.number(1000000000.0) 747 748 if milli or nano: 749 expression.set("sec", sec) 750 751 return rename_func("MAKE_TIMESTAMP")(self, expression) 752 753 def tablesample_sql( 754 self, 755 expression: exp.TableSample, 756 sep: str = " AS ", 757 tablesample_keyword: t.Optional[str] = None, 758 ) -> str: 759 if not isinstance(expression.parent, exp.Select): 760 # This sample clause only applies to a single source, not the entire resulting relation 761 tablesample_keyword = "TABLESAMPLE" 762 763 if expression.args.get("size"): 764 method = expression.args.get("method") 765 if method and method.name.upper() != "RESERVOIR": 766 self.unsupported( 767 f"Sampling method {method} is not supported with a discrete sample count, " 768 "defaulting to reservoir sampling" 769 ) 770 expression.set("method", exp.var("RESERVOIR")) 771 772 return super().tablesample_sql( 773 expression, sep=sep, tablesample_keyword=tablesample_keyword 774 ) 775 776 def interval_sql(self, expression: exp.Interval) -> str: 777 multiplier: t.Optional[int] = None 778 unit = expression.text("unit").lower() 779 780 if unit.startswith("week"): 781 multiplier = 7 782 if unit.startswith("quarter"): 783 multiplier = 90 784 785 if multiplier: 786 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 787 788 return super().interval_sql(expression) 789 790 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 791 if isinstance(expression.parent, exp.UserDefinedFunction): 792 return self.sql(expression, "this") 793 return super().columndef_sql(expression, sep) 794 795 def join_sql(self, expression: exp.Join) -> str: 796 if ( 797 expression.side == "LEFT" 798 and not expression.args.get("on") 799 and isinstance(expression.this, exp.Unnest) 800 ): 801 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 802 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 803 return super().join_sql(expression.on(exp.true())) 804 805 return super().join_sql(expression) 806 807 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 808 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 809 if expression.args.get("is_end_exclusive"): 810 return rename_func("RANGE")(self, expression) 811 812 return self.function_fallback_sql(expression) 813 814 def bracket_sql(self, expression: exp.Bracket) -> str: 815 this = expression.this 816 if isinstance(this, exp.Array): 817 this.replace(exp.paren(this)) 818 819 bracket = super().bracket_sql(expression) 820 821 if not expression.args.get("returns_list_for_maps"): 822 if not this.type: 823 from sqlglot.optimizer.annotate_types import annotate_types 824 825 this = annotate_types(this) 826 827 if this.is_type(exp.DataType.Type.MAP): 828 bracket = f"({bracket})[1]" 829 830 return bracket 831 832 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 833 expression_sql = self.sql(expression, "expression") 834 835 func = expression.this 836 if isinstance(func, exp.PERCENTILES): 837 # Make the order key the first arg and slide the fraction to the right 838 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 839 order_col = expression.find(exp.Ordered) 840 if order_col: 841 func.set("expression", func.this) 842 func.set("this", order_col.this) 843 844 this = self.sql(expression, "this").rstrip(")") 845 846 return f"{this}{expression_sql})" 847 848 def length_sql(self, expression: exp.Length) -> str: 849 arg = expression.this 850 851 # Dialects like BQ and Snowflake also accept binary values as args, so 852 # DDB will attempt to infer the type or resort to case/when resolution 853 if not expression.args.get("binary") or arg.is_string: 854 return self.func("LENGTH", arg) 855 856 if not arg.type: 857 from sqlglot.optimizer.annotate_types import annotate_types 858 859 arg = annotate_types(arg) 860 861 if arg.is_type(*exp.DataType.TEXT_TYPES): 862 return self.func("LENGTH", arg) 863 864 # We need these casts to make duckdb's static type checker happy 865 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 866 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 867 868 case = ( 869 exp.case(self.func("TYPEOF", arg)) 870 .when( 871 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 872 ) # anonymous to break length_sql recursion 873 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 874 ) 875 876 return self.sql(case) 877 878 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 879 this = expression.this 880 key = expression.args.get("key") 881 key_sql = key.name if isinstance(key, exp.Expression) else "" 882 value_sql = self.sql(expression, "value") 883 884 kv_sql = f"{key_sql} := {value_sql}" 885 886 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 887 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 888 if isinstance(this, exp.Struct) and not this.expressions: 889 return self.func("STRUCT_PACK", kv_sql) 890 891 return self.func("STRUCT_INSERT", this, kv_sql) 892 893 def unnest_sql(self, expression: exp.Unnest) -> str: 894 explode_array = expression.args.get("explode_array") 895 if explode_array: 896 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 897 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 898 expression.expressions.append( 899 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 900 ) 901 902 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 903 alias = expression.args.get("alias") 904 if alias: 905 expression.set("alias", None) 906 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 907 908 unnest_sql = super().unnest_sql(expression) 909 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 910 return self.sql(select) 911 912 return super().unnest_sql(expression)
250class DuckDB(Dialect): 251 NULL_ORDERING = "nulls_are_last" 252 SUPPORTS_USER_DEFINED_TYPES = False 253 SAFE_DIVISION = True 254 INDEX_OFFSET = 1 255 CONCAT_COALESCE = True 256 SUPPORTS_ORDER_BY_ALL = True 257 SUPPORTS_FIXED_SIZE_ARRAYS = True 258 259 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 260 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 261 262 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 263 if isinstance(path, exp.Literal): 264 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 265 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 266 # This check ensures we'll avoid trying to parse these as JSON paths, which can 267 # either result in a noisy warning or in an invalid representation of the path. 268 path_text = path.name 269 if path_text.startswith("/") or "[#" in path_text: 270 return path 271 272 return super().to_json_path(path) 273 274 class Tokenizer(tokens.Tokenizer): 275 HEREDOC_STRINGS = ["$"] 276 277 HEREDOC_TAG_IS_IDENTIFIER = True 278 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 279 280 KEYWORDS = { 281 **tokens.Tokenizer.KEYWORDS, 282 "//": TokenType.DIV, 283 "ATTACH": TokenType.COMMAND, 284 "BINARY": TokenType.VARBINARY, 285 "BITSTRING": TokenType.BIT, 286 "BPCHAR": TokenType.TEXT, 287 "CHAR": TokenType.TEXT, 288 "CHARACTER VARYING": TokenType.TEXT, 289 "EXCLUDE": TokenType.EXCEPT, 290 "LOGICAL": TokenType.BOOLEAN, 291 "ONLY": TokenType.ONLY, 292 "PIVOT_WIDER": TokenType.PIVOT, 293 "POSITIONAL": TokenType.POSITIONAL, 294 "SIGNED": TokenType.INT, 295 "STRING": TokenType.TEXT, 296 "SUMMARIZE": TokenType.SUMMARIZE, 297 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 298 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 299 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 300 "TIMESTAMP_US": TokenType.TIMESTAMP, 301 "UBIGINT": TokenType.UBIGINT, 302 "UINTEGER": TokenType.UINT, 303 "USMALLINT": TokenType.USMALLINT, 304 "UTINYINT": TokenType.UTINYINT, 305 "VARCHAR": TokenType.TEXT, 306 } 307 KEYWORDS.pop("/*+") 308 309 SINGLE_TOKENS = { 310 **tokens.Tokenizer.SINGLE_TOKENS, 311 "$": TokenType.PARAMETER, 312 } 313 314 class Parser(parser.Parser): 315 BITWISE = { 316 **parser.Parser.BITWISE, 317 TokenType.TILDA: exp.RegexpLike, 318 } 319 320 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 325 "ARRAY_SORT": exp.SortArray.from_arg_list, 326 "DATEDIFF": _build_date_diff, 327 "DATE_DIFF": _build_date_diff, 328 "DATE_TRUNC": date_trunc_to_time, 329 "DATETRUNC": date_trunc_to_time, 330 "DECODE": lambda args: exp.Decode( 331 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 332 ), 333 "ENCODE": lambda args: exp.Encode( 334 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 335 ), 336 "EPOCH": exp.TimeToUnix.from_arg_list, 337 "EPOCH_MS": lambda args: exp.UnixToTime( 338 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 339 ), 340 "JSON": exp.ParseJSON.from_arg_list, 341 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 342 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 343 "LIST_HAS": exp.ArrayContains.from_arg_list, 344 "LIST_REVERSE_SORT": _build_sort_array_desc, 345 "LIST_SORT": exp.SortArray.from_arg_list, 346 "LIST_VALUE": lambda args: exp.Array(expressions=args), 347 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 348 "MAKE_TIMESTAMP": _build_make_timestamp, 349 "MEDIAN": lambda args: exp.PercentileCont( 350 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 351 ), 352 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 353 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 356 ), 357 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 358 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 359 this=seq_get(args, 0), 360 expression=seq_get(args, 1), 361 replacement=seq_get(args, 2), 362 modifiers=seq_get(args, 3), 363 ), 364 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 365 "STRING_SPLIT": exp.Split.from_arg_list, 366 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 367 "STRING_TO_ARRAY": exp.Split.from_arg_list, 368 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 369 "STRUCT_PACK": exp.Struct.from_arg_list, 370 "STR_SPLIT": exp.Split.from_arg_list, 371 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 372 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 373 "UNNEST": exp.Explode.from_arg_list, 374 "XOR": binary_from_function(exp.BitwiseXor), 375 "GENERATE_SERIES": _build_generate_series(), 376 "RANGE": _build_generate_series(end_exclusive=True), 377 } 378 379 FUNCTIONS.pop("DATE_SUB") 380 381 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 382 FUNCTION_PARSERS.pop("DECODE") 383 384 NO_PAREN_FUNCTION_PARSERS = { 385 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 386 "MAP": lambda self: self._parse_map(), 387 } 388 389 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 390 TokenType.SEMI, 391 TokenType.ANTI, 392 } 393 394 PLACEHOLDER_PARSERS = { 395 **parser.Parser.PLACEHOLDER_PARSERS, 396 TokenType.PARAMETER: lambda self: ( 397 self.expression(exp.Placeholder, this=self._prev.text) 398 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 399 else None 400 ), 401 } 402 403 TYPE_CONVERTERS = { 404 # https://duckdb.org/docs/sql/data_types/numeric 405 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 406 # https://duckdb.org/docs/sql/data_types/text 407 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 408 } 409 410 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 411 # https://duckdb.org/docs/sql/samples.html 412 sample = super()._parse_table_sample(as_modifier=as_modifier) 413 if sample and not sample.args.get("method"): 414 if sample.args.get("size"): 415 sample.set("method", exp.var("RESERVOIR")) 416 else: 417 sample.set("method", exp.var("SYSTEM")) 418 419 return sample 420 421 def _parse_bracket( 422 self, this: t.Optional[exp.Expression] = None 423 ) -> t.Optional[exp.Expression]: 424 bracket = super()._parse_bracket(this) 425 if isinstance(bracket, exp.Bracket): 426 bracket.set("returns_list_for_maps", True) 427 428 return bracket 429 430 def _parse_map(self) -> exp.ToMap | exp.Map: 431 if self._match(TokenType.L_BRACE, advance=False): 432 return self.expression(exp.ToMap, this=self._parse_bracket()) 433 434 args = self._parse_wrapped_csv(self._parse_assignment) 435 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 436 437 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 438 return self._parse_field_def() 439 440 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 441 if len(aggregations) == 1: 442 return super()._pivot_column_names(aggregations) 443 return pivot_column_names(aggregations, dialect="duckdb") 444 445 class Generator(generator.Generator): 446 PARAMETER_TOKEN = "$" 447 NAMED_PLACEHOLDER_TOKEN = "$" 448 JOIN_HINTS = False 449 TABLE_HINTS = False 450 QUERY_HINTS = False 451 LIMIT_FETCH = "LIMIT" 452 STRUCT_DELIMITER = ("(", ")") 453 RENAME_TABLE_WITH_DB = False 454 NVL2_SUPPORTED = False 455 SEMI_ANTI_JOIN_WITH_SIDE = False 456 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 457 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 458 LAST_DAY_SUPPORTS_DATE_PART = False 459 JSON_KEY_VALUE_PAIR_SEP = "," 460 IGNORE_NULLS_IN_FUNC = True 461 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 462 SUPPORTS_CREATE_TABLE_LIKE = False 463 MULTI_ARG_DISTINCT = False 464 CAN_IMPLEMENT_ARRAY_ANY = True 465 SUPPORTS_TO_NUMBER = False 466 COPY_HAS_INTO_KEYWORD = False 467 STAR_EXCEPT = "EXCLUDE" 468 PAD_FILL_PATTERN_IS_REQUIRED = True 469 ARRAY_CONCAT_IS_VAR_LEN = False 470 471 TRANSFORMS = { 472 **generator.Generator.TRANSFORMS, 473 exp.ApproxDistinct: approx_count_distinct_sql, 474 exp.Array: inline_array_unless_query, 475 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 476 exp.ArrayFilter: rename_func("LIST_FILTER"), 477 exp.ArraySize: rename_func("ARRAY_LENGTH"), 478 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 479 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 480 exp.ArraySort: _array_sort_sql, 481 exp.ArraySum: rename_func("LIST_SUM"), 482 exp.BitwiseXor: rename_func("XOR"), 483 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 484 exp.CurrentDate: lambda *_: "CURRENT_DATE", 485 exp.CurrentTime: lambda *_: "CURRENT_TIME", 486 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 487 exp.DayOfMonth: rename_func("DAYOFMONTH"), 488 exp.DayOfWeek: rename_func("DAYOFWEEK"), 489 exp.DayOfYear: rename_func("DAYOFYEAR"), 490 exp.DataType: _datatype_sql, 491 exp.Date: _date_sql, 492 exp.DateAdd: _date_delta_sql, 493 exp.DateFromParts: rename_func("MAKE_DATE"), 494 exp.DateSub: _date_delta_sql, 495 exp.DateDiff: _date_diff_sql, 496 exp.DateStrToDate: datestrtodate_sql, 497 exp.Datetime: no_datetime_sql, 498 exp.DatetimeSub: _date_delta_sql, 499 exp.DatetimeAdd: _date_delta_sql, 500 exp.DateToDi: lambda self, 501 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 502 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 503 exp.DiToDate: lambda self, 504 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 505 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 506 exp.GenerateDateArray: _generate_datetime_array_sql, 507 exp.GenerateTimestampArray: _generate_datetime_array_sql, 508 exp.Explode: rename_func("UNNEST"), 509 exp.IntDiv: lambda self, e: self.binary(e, "//"), 510 exp.IsInf: rename_func("ISINF"), 511 exp.IsNan: rename_func("ISNAN"), 512 exp.JSONExtract: _arrow_json_extract_sql, 513 exp.JSONExtractScalar: _arrow_json_extract_sql, 514 exp.JSONFormat: _json_format_sql, 515 exp.LogicalOr: rename_func("BOOL_OR"), 516 exp.LogicalAnd: rename_func("BOOL_AND"), 517 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 518 exp.MonthsBetween: lambda self, e: self.func( 519 "DATEDIFF", 520 "'month'", 521 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 522 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 523 ), 524 exp.PercentileCont: rename_func("QUANTILE_CONT"), 525 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 526 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 527 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 528 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 529 exp.RegexpExtract: regexp_extract_sql, 530 exp.RegexpReplace: lambda self, e: self.func( 531 "REGEXP_REPLACE", 532 e.this, 533 e.expression, 534 e.args.get("replacement"), 535 e.args.get("modifiers"), 536 ), 537 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 538 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 539 exp.Return: lambda self, e: self.sql(e, "this"), 540 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 541 exp.Rand: rename_func("RANDOM"), 542 exp.SafeDivide: no_safe_divide_sql, 543 exp.Split: rename_func("STR_SPLIT"), 544 exp.SortArray: _sort_array_sql, 545 exp.StrPosition: str_position_sql, 546 exp.StrToUnix: lambda self, e: self.func( 547 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 548 ), 549 exp.Struct: _struct_sql, 550 exp.TimeAdd: _date_delta_sql, 551 exp.Time: no_time_sql, 552 exp.TimeDiff: _timediff_sql, 553 exp.Timestamp: no_timestamp_sql, 554 exp.TimestampDiff: lambda self, e: self.func( 555 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 556 ), 557 exp.TimestampTrunc: timestamptrunc_sql(), 558 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 559 exp.TimeStrToTime: timestrtotime_sql, 560 exp.TimeStrToUnix: lambda self, e: self.func( 561 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 562 ), 563 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 564 exp.TimeToUnix: rename_func("EPOCH"), 565 exp.TsOrDiToDi: lambda self, 566 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 567 exp.TsOrDsAdd: _date_delta_sql, 568 exp.TsOrDsDiff: lambda self, e: self.func( 569 "DATE_DIFF", 570 f"'{e.args.get('unit') or 'DAY'}'", 571 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 572 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 573 ), 574 exp.UnixToStr: lambda self, e: self.func( 575 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 576 ), 577 exp.DatetimeTrunc: lambda self, e: self.func( 578 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 579 ), 580 exp.UnixToTime: _unix_to_time_sql, 581 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 582 exp.VariancePop: rename_func("VAR_POP"), 583 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 584 exp.Xor: bool_xor_sql, 585 } 586 587 SUPPORTED_JSON_PATH_PARTS = { 588 exp.JSONPathKey, 589 exp.JSONPathRoot, 590 exp.JSONPathSubscript, 591 exp.JSONPathWildcard, 592 } 593 594 TYPE_MAPPING = { 595 **generator.Generator.TYPE_MAPPING, 596 exp.DataType.Type.BINARY: "BLOB", 597 exp.DataType.Type.BPCHAR: "TEXT", 598 exp.DataType.Type.CHAR: "TEXT", 599 exp.DataType.Type.FLOAT: "REAL", 600 exp.DataType.Type.NCHAR: "TEXT", 601 exp.DataType.Type.NVARCHAR: "TEXT", 602 exp.DataType.Type.UINT: "UINTEGER", 603 exp.DataType.Type.VARBINARY: "BLOB", 604 exp.DataType.Type.ROWVERSION: "BLOB", 605 exp.DataType.Type.VARCHAR: "TEXT", 606 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 607 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 608 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 609 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 610 } 611 612 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 613 RESERVED_KEYWORDS = { 614 "array", 615 "analyse", 616 "union", 617 "all", 618 "when", 619 "in_p", 620 "default", 621 "create_p", 622 "window", 623 "asymmetric", 624 "to", 625 "else", 626 "localtime", 627 "from", 628 "end_p", 629 "select", 630 "current_date", 631 "foreign", 632 "with", 633 "grant", 634 "session_user", 635 "or", 636 "except", 637 "references", 638 "fetch", 639 "limit", 640 "group_p", 641 "leading", 642 "into", 643 "collate", 644 "offset", 645 "do", 646 "then", 647 "localtimestamp", 648 "check_p", 649 "lateral_p", 650 "current_role", 651 "where", 652 "asc_p", 653 "placing", 654 "desc_p", 655 "user", 656 "unique", 657 "initially", 658 "column", 659 "both", 660 "some", 661 "as", 662 "any", 663 "only", 664 "deferrable", 665 "null_p", 666 "current_time", 667 "true_p", 668 "table", 669 "case", 670 "trailing", 671 "variadic", 672 "for", 673 "on", 674 "distinct", 675 "false_p", 676 "not", 677 "constraint", 678 "current_timestamp", 679 "returning", 680 "primary", 681 "intersect", 682 "having", 683 "analyze", 684 "current_user", 685 "and", 686 "cast", 687 "symmetric", 688 "using", 689 "order", 690 "current_catalog", 691 } 692 693 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 694 695 # DuckDB doesn't generally support CREATE TABLE .. properties 696 # https://duckdb.org/docs/sql/statements/create_table.html 697 PROPERTIES_LOCATION = { 698 prop: exp.Properties.Location.UNSUPPORTED 699 for prop in generator.Generator.PROPERTIES_LOCATION 700 } 701 702 # There are a few exceptions (e.g. temporary tables) which are supported or 703 # can be transpiled to DuckDB, so we explicitly override them accordingly 704 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 705 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 706 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 707 708 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 709 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 710 711 def strtotime_sql(self, expression: exp.StrToTime) -> str: 712 if expression.args.get("safe"): 713 formatted_time = self.format_time(expression) 714 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 715 return str_to_time_sql(self, expression) 716 717 def strtodate_sql(self, expression: exp.StrToDate) -> str: 718 if expression.args.get("safe"): 719 formatted_time = self.format_time(expression) 720 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 721 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 722 723 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 724 arg = expression.this 725 if expression.args.get("safe"): 726 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 727 return self.func("JSON", arg) 728 729 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 730 nano = expression.args.get("nano") 731 if nano is not None: 732 expression.set( 733 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 734 ) 735 736 return rename_func("MAKE_TIME")(self, expression) 737 738 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 739 sec = expression.args["sec"] 740 741 milli = expression.args.get("milli") 742 if milli is not None: 743 sec += milli.pop() / exp.Literal.number(1000.0) 744 745 nano = expression.args.get("nano") 746 if nano is not None: 747 sec += nano.pop() / exp.Literal.number(1000000000.0) 748 749 if milli or nano: 750 expression.set("sec", sec) 751 752 return rename_func("MAKE_TIMESTAMP")(self, expression) 753 754 def tablesample_sql( 755 self, 756 expression: exp.TableSample, 757 sep: str = " AS ", 758 tablesample_keyword: t.Optional[str] = None, 759 ) -> str: 760 if not isinstance(expression.parent, exp.Select): 761 # This sample clause only applies to a single source, not the entire resulting relation 762 tablesample_keyword = "TABLESAMPLE" 763 764 if expression.args.get("size"): 765 method = expression.args.get("method") 766 if method and method.name.upper() != "RESERVOIR": 767 self.unsupported( 768 f"Sampling method {method} is not supported with a discrete sample count, " 769 "defaulting to reservoir sampling" 770 ) 771 expression.set("method", exp.var("RESERVOIR")) 772 773 return super().tablesample_sql( 774 expression, sep=sep, tablesample_keyword=tablesample_keyword 775 ) 776 777 def interval_sql(self, expression: exp.Interval) -> str: 778 multiplier: t.Optional[int] = None 779 unit = expression.text("unit").lower() 780 781 if unit.startswith("week"): 782 multiplier = 7 783 if unit.startswith("quarter"): 784 multiplier = 90 785 786 if multiplier: 787 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 788 789 return super().interval_sql(expression) 790 791 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 792 if isinstance(expression.parent, exp.UserDefinedFunction): 793 return self.sql(expression, "this") 794 return super().columndef_sql(expression, sep) 795 796 def join_sql(self, expression: exp.Join) -> str: 797 if ( 798 expression.side == "LEFT" 799 and not expression.args.get("on") 800 and isinstance(expression.this, exp.Unnest) 801 ): 802 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 803 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 804 return super().join_sql(expression.on(exp.true())) 805 806 return super().join_sql(expression) 807 808 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 809 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 810 if expression.args.get("is_end_exclusive"): 811 return rename_func("RANGE")(self, expression) 812 813 return self.function_fallback_sql(expression) 814 815 def bracket_sql(self, expression: exp.Bracket) -> str: 816 this = expression.this 817 if isinstance(this, exp.Array): 818 this.replace(exp.paren(this)) 819 820 bracket = super().bracket_sql(expression) 821 822 if not expression.args.get("returns_list_for_maps"): 823 if not this.type: 824 from sqlglot.optimizer.annotate_types import annotate_types 825 826 this = annotate_types(this) 827 828 if this.is_type(exp.DataType.Type.MAP): 829 bracket = f"({bracket})[1]" 830 831 return bracket 832 833 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 834 expression_sql = self.sql(expression, "expression") 835 836 func = expression.this 837 if isinstance(func, exp.PERCENTILES): 838 # Make the order key the first arg and slide the fraction to the right 839 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 840 order_col = expression.find(exp.Ordered) 841 if order_col: 842 func.set("expression", func.this) 843 func.set("this", order_col.this) 844 845 this = self.sql(expression, "this").rstrip(")") 846 847 return f"{this}{expression_sql})" 848 849 def length_sql(self, expression: exp.Length) -> str: 850 arg = expression.this 851 852 # Dialects like BQ and Snowflake also accept binary values as args, so 853 # DDB will attempt to infer the type or resort to case/when resolution 854 if not expression.args.get("binary") or arg.is_string: 855 return self.func("LENGTH", arg) 856 857 if not arg.type: 858 from sqlglot.optimizer.annotate_types import annotate_types 859 860 arg = annotate_types(arg) 861 862 if arg.is_type(*exp.DataType.TEXT_TYPES): 863 return self.func("LENGTH", arg) 864 865 # We need these casts to make duckdb's static type checker happy 866 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 867 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 868 869 case = ( 870 exp.case(self.func("TYPEOF", arg)) 871 .when( 872 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 873 ) # anonymous to break length_sql recursion 874 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 875 ) 876 877 return self.sql(case) 878 879 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 880 this = expression.this 881 key = expression.args.get("key") 882 key_sql = key.name if isinstance(key, exp.Expression) else "" 883 value_sql = self.sql(expression, "value") 884 885 kv_sql = f"{key_sql} := {value_sql}" 886 887 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 888 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 889 if isinstance(this, exp.Struct) and not this.expressions: 890 return self.func("STRUCT_PACK", kv_sql) 891 892 return self.func("STRUCT_INSERT", this, kv_sql) 893 894 def unnest_sql(self, expression: exp.Unnest) -> str: 895 explode_array = expression.args.get("explode_array") 896 if explode_array: 897 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 898 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 899 expression.expressions.append( 900 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 901 ) 902 903 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 904 alias = expression.args.get("alias") 905 if alias: 906 expression.set("alias", None) 907 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 908 909 unnest_sql = super().unnest_sql(expression) 910 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 911 return self.sql(select) 912 913 return super().unnest_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator
Specifies the strategy according to which identifiers should be normalized.
262 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 263 if isinstance(path, exp.Literal): 264 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 265 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 266 # This check ensures we'll avoid trying to parse these as JSON paths, which can 267 # either result in a noisy warning or in an invalid representation of the path. 268 path_text = path.name 269 if path_text.startswith("/") or "[#" in path_text: 270 return path 271 272 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
274 class Tokenizer(tokens.Tokenizer): 275 HEREDOC_STRINGS = ["$"] 276 277 HEREDOC_TAG_IS_IDENTIFIER = True 278 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 279 280 KEYWORDS = { 281 **tokens.Tokenizer.KEYWORDS, 282 "//": TokenType.DIV, 283 "ATTACH": TokenType.COMMAND, 284 "BINARY": TokenType.VARBINARY, 285 "BITSTRING": TokenType.BIT, 286 "BPCHAR": TokenType.TEXT, 287 "CHAR": TokenType.TEXT, 288 "CHARACTER VARYING": TokenType.TEXT, 289 "EXCLUDE": TokenType.EXCEPT, 290 "LOGICAL": TokenType.BOOLEAN, 291 "ONLY": TokenType.ONLY, 292 "PIVOT_WIDER": TokenType.PIVOT, 293 "POSITIONAL": TokenType.POSITIONAL, 294 "SIGNED": TokenType.INT, 295 "STRING": TokenType.TEXT, 296 "SUMMARIZE": TokenType.SUMMARIZE, 297 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 298 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 299 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 300 "TIMESTAMP_US": TokenType.TIMESTAMP, 301 "UBIGINT": TokenType.UBIGINT, 302 "UINTEGER": TokenType.UINT, 303 "USMALLINT": TokenType.USMALLINT, 304 "UTINYINT": TokenType.UTINYINT, 305 "VARCHAR": TokenType.TEXT, 306 } 307 KEYWORDS.pop("/*+") 308 309 SINGLE_TOKENS = { 310 **tokens.Tokenizer.SINGLE_TOKENS, 311 "$": TokenType.PARAMETER, 312 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
314 class Parser(parser.Parser): 315 BITWISE = { 316 **parser.Parser.BITWISE, 317 TokenType.TILDA: exp.RegexpLike, 318 } 319 320 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 325 "ARRAY_SORT": exp.SortArray.from_arg_list, 326 "DATEDIFF": _build_date_diff, 327 "DATE_DIFF": _build_date_diff, 328 "DATE_TRUNC": date_trunc_to_time, 329 "DATETRUNC": date_trunc_to_time, 330 "DECODE": lambda args: exp.Decode( 331 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 332 ), 333 "ENCODE": lambda args: exp.Encode( 334 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 335 ), 336 "EPOCH": exp.TimeToUnix.from_arg_list, 337 "EPOCH_MS": lambda args: exp.UnixToTime( 338 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 339 ), 340 "JSON": exp.ParseJSON.from_arg_list, 341 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 342 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 343 "LIST_HAS": exp.ArrayContains.from_arg_list, 344 "LIST_REVERSE_SORT": _build_sort_array_desc, 345 "LIST_SORT": exp.SortArray.from_arg_list, 346 "LIST_VALUE": lambda args: exp.Array(expressions=args), 347 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 348 "MAKE_TIMESTAMP": _build_make_timestamp, 349 "MEDIAN": lambda args: exp.PercentileCont( 350 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 351 ), 352 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 353 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 356 ), 357 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 358 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 359 this=seq_get(args, 0), 360 expression=seq_get(args, 1), 361 replacement=seq_get(args, 2), 362 modifiers=seq_get(args, 3), 363 ), 364 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 365 "STRING_SPLIT": exp.Split.from_arg_list, 366 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 367 "STRING_TO_ARRAY": exp.Split.from_arg_list, 368 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 369 "STRUCT_PACK": exp.Struct.from_arg_list, 370 "STR_SPLIT": exp.Split.from_arg_list, 371 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 372 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 373 "UNNEST": exp.Explode.from_arg_list, 374 "XOR": binary_from_function(exp.BitwiseXor), 375 "GENERATE_SERIES": _build_generate_series(), 376 "RANGE": _build_generate_series(end_exclusive=True), 377 } 378 379 FUNCTIONS.pop("DATE_SUB") 380 381 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 382 FUNCTION_PARSERS.pop("DECODE") 383 384 NO_PAREN_FUNCTION_PARSERS = { 385 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 386 "MAP": lambda self: self._parse_map(), 387 } 388 389 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 390 TokenType.SEMI, 391 TokenType.ANTI, 392 } 393 394 PLACEHOLDER_PARSERS = { 395 **parser.Parser.PLACEHOLDER_PARSERS, 396 TokenType.PARAMETER: lambda self: ( 397 self.expression(exp.Placeholder, this=self._prev.text) 398 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 399 else None 400 ), 401 } 402 403 TYPE_CONVERTERS = { 404 # https://duckdb.org/docs/sql/data_types/numeric 405 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 406 # https://duckdb.org/docs/sql/data_types/text 407 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 408 } 409 410 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 411 # https://duckdb.org/docs/sql/samples.html 412 sample = super()._parse_table_sample(as_modifier=as_modifier) 413 if sample and not sample.args.get("method"): 414 if sample.args.get("size"): 415 sample.set("method", exp.var("RESERVOIR")) 416 else: 417 sample.set("method", exp.var("SYSTEM")) 418 419 return sample 420 421 def _parse_bracket( 422 self, this: t.Optional[exp.Expression] = None 423 ) -> t.Optional[exp.Expression]: 424 bracket = super()._parse_bracket(this) 425 if isinstance(bracket, exp.Bracket): 426 bracket.set("returns_list_for_maps", True) 427 428 return bracket 429 430 def _parse_map(self) -> exp.ToMap | exp.Map: 431 if self._match(TokenType.L_BRACE, advance=False): 432 return self.expression(exp.ToMap, this=self._parse_bracket()) 433 434 args = self._parse_wrapped_csv(self._parse_assignment) 435 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 436 437 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 438 return self._parse_field_def() 439 440 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 441 if len(aggregations) == 1: 442 return super()._pivot_column_names(aggregations) 443 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
445 class Generator(generator.Generator): 446 PARAMETER_TOKEN = "$" 447 NAMED_PLACEHOLDER_TOKEN = "$" 448 JOIN_HINTS = False 449 TABLE_HINTS = False 450 QUERY_HINTS = False 451 LIMIT_FETCH = "LIMIT" 452 STRUCT_DELIMITER = ("(", ")") 453 RENAME_TABLE_WITH_DB = False 454 NVL2_SUPPORTED = False 455 SEMI_ANTI_JOIN_WITH_SIDE = False 456 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 457 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 458 LAST_DAY_SUPPORTS_DATE_PART = False 459 JSON_KEY_VALUE_PAIR_SEP = "," 460 IGNORE_NULLS_IN_FUNC = True 461 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 462 SUPPORTS_CREATE_TABLE_LIKE = False 463 MULTI_ARG_DISTINCT = False 464 CAN_IMPLEMENT_ARRAY_ANY = True 465 SUPPORTS_TO_NUMBER = False 466 COPY_HAS_INTO_KEYWORD = False 467 STAR_EXCEPT = "EXCLUDE" 468 PAD_FILL_PATTERN_IS_REQUIRED = True 469 ARRAY_CONCAT_IS_VAR_LEN = False 470 471 TRANSFORMS = { 472 **generator.Generator.TRANSFORMS, 473 exp.ApproxDistinct: approx_count_distinct_sql, 474 exp.Array: inline_array_unless_query, 475 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 476 exp.ArrayFilter: rename_func("LIST_FILTER"), 477 exp.ArraySize: rename_func("ARRAY_LENGTH"), 478 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 479 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 480 exp.ArraySort: _array_sort_sql, 481 exp.ArraySum: rename_func("LIST_SUM"), 482 exp.BitwiseXor: rename_func("XOR"), 483 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 484 exp.CurrentDate: lambda *_: "CURRENT_DATE", 485 exp.CurrentTime: lambda *_: "CURRENT_TIME", 486 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 487 exp.DayOfMonth: rename_func("DAYOFMONTH"), 488 exp.DayOfWeek: rename_func("DAYOFWEEK"), 489 exp.DayOfYear: rename_func("DAYOFYEAR"), 490 exp.DataType: _datatype_sql, 491 exp.Date: _date_sql, 492 exp.DateAdd: _date_delta_sql, 493 exp.DateFromParts: rename_func("MAKE_DATE"), 494 exp.DateSub: _date_delta_sql, 495 exp.DateDiff: _date_diff_sql, 496 exp.DateStrToDate: datestrtodate_sql, 497 exp.Datetime: no_datetime_sql, 498 exp.DatetimeSub: _date_delta_sql, 499 exp.DatetimeAdd: _date_delta_sql, 500 exp.DateToDi: lambda self, 501 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 502 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 503 exp.DiToDate: lambda self, 504 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 505 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 506 exp.GenerateDateArray: _generate_datetime_array_sql, 507 exp.GenerateTimestampArray: _generate_datetime_array_sql, 508 exp.Explode: rename_func("UNNEST"), 509 exp.IntDiv: lambda self, e: self.binary(e, "//"), 510 exp.IsInf: rename_func("ISINF"), 511 exp.IsNan: rename_func("ISNAN"), 512 exp.JSONExtract: _arrow_json_extract_sql, 513 exp.JSONExtractScalar: _arrow_json_extract_sql, 514 exp.JSONFormat: _json_format_sql, 515 exp.LogicalOr: rename_func("BOOL_OR"), 516 exp.LogicalAnd: rename_func("BOOL_AND"), 517 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 518 exp.MonthsBetween: lambda self, e: self.func( 519 "DATEDIFF", 520 "'month'", 521 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 522 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 523 ), 524 exp.PercentileCont: rename_func("QUANTILE_CONT"), 525 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 526 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 527 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 528 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 529 exp.RegexpExtract: regexp_extract_sql, 530 exp.RegexpReplace: lambda self, e: self.func( 531 "REGEXP_REPLACE", 532 e.this, 533 e.expression, 534 e.args.get("replacement"), 535 e.args.get("modifiers"), 536 ), 537 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 538 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 539 exp.Return: lambda self, e: self.sql(e, "this"), 540 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 541 exp.Rand: rename_func("RANDOM"), 542 exp.SafeDivide: no_safe_divide_sql, 543 exp.Split: rename_func("STR_SPLIT"), 544 exp.SortArray: _sort_array_sql, 545 exp.StrPosition: str_position_sql, 546 exp.StrToUnix: lambda self, e: self.func( 547 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 548 ), 549 exp.Struct: _struct_sql, 550 exp.TimeAdd: _date_delta_sql, 551 exp.Time: no_time_sql, 552 exp.TimeDiff: _timediff_sql, 553 exp.Timestamp: no_timestamp_sql, 554 exp.TimestampDiff: lambda self, e: self.func( 555 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 556 ), 557 exp.TimestampTrunc: timestamptrunc_sql(), 558 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 559 exp.TimeStrToTime: timestrtotime_sql, 560 exp.TimeStrToUnix: lambda self, e: self.func( 561 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 562 ), 563 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 564 exp.TimeToUnix: rename_func("EPOCH"), 565 exp.TsOrDiToDi: lambda self, 566 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 567 exp.TsOrDsAdd: _date_delta_sql, 568 exp.TsOrDsDiff: lambda self, e: self.func( 569 "DATE_DIFF", 570 f"'{e.args.get('unit') or 'DAY'}'", 571 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 572 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 573 ), 574 exp.UnixToStr: lambda self, e: self.func( 575 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 576 ), 577 exp.DatetimeTrunc: lambda self, e: self.func( 578 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 579 ), 580 exp.UnixToTime: _unix_to_time_sql, 581 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 582 exp.VariancePop: rename_func("VAR_POP"), 583 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 584 exp.Xor: bool_xor_sql, 585 } 586 587 SUPPORTED_JSON_PATH_PARTS = { 588 exp.JSONPathKey, 589 exp.JSONPathRoot, 590 exp.JSONPathSubscript, 591 exp.JSONPathWildcard, 592 } 593 594 TYPE_MAPPING = { 595 **generator.Generator.TYPE_MAPPING, 596 exp.DataType.Type.BINARY: "BLOB", 597 exp.DataType.Type.BPCHAR: "TEXT", 598 exp.DataType.Type.CHAR: "TEXT", 599 exp.DataType.Type.FLOAT: "REAL", 600 exp.DataType.Type.NCHAR: "TEXT", 601 exp.DataType.Type.NVARCHAR: "TEXT", 602 exp.DataType.Type.UINT: "UINTEGER", 603 exp.DataType.Type.VARBINARY: "BLOB", 604 exp.DataType.Type.ROWVERSION: "BLOB", 605 exp.DataType.Type.VARCHAR: "TEXT", 606 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 607 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 608 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 609 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 610 } 611 612 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 613 RESERVED_KEYWORDS = { 614 "array", 615 "analyse", 616 "union", 617 "all", 618 "when", 619 "in_p", 620 "default", 621 "create_p", 622 "window", 623 "asymmetric", 624 "to", 625 "else", 626 "localtime", 627 "from", 628 "end_p", 629 "select", 630 "current_date", 631 "foreign", 632 "with", 633 "grant", 634 "session_user", 635 "or", 636 "except", 637 "references", 638 "fetch", 639 "limit", 640 "group_p", 641 "leading", 642 "into", 643 "collate", 644 "offset", 645 "do", 646 "then", 647 "localtimestamp", 648 "check_p", 649 "lateral_p", 650 "current_role", 651 "where", 652 "asc_p", 653 "placing", 654 "desc_p", 655 "user", 656 "unique", 657 "initially", 658 "column", 659 "both", 660 "some", 661 "as", 662 "any", 663 "only", 664 "deferrable", 665 "null_p", 666 "current_time", 667 "true_p", 668 "table", 669 "case", 670 "trailing", 671 "variadic", 672 "for", 673 "on", 674 "distinct", 675 "false_p", 676 "not", 677 "constraint", 678 "current_timestamp", 679 "returning", 680 "primary", 681 "intersect", 682 "having", 683 "analyze", 684 "current_user", 685 "and", 686 "cast", 687 "symmetric", 688 "using", 689 "order", 690 "current_catalog", 691 } 692 693 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 694 695 # DuckDB doesn't generally support CREATE TABLE .. properties 696 # https://duckdb.org/docs/sql/statements/create_table.html 697 PROPERTIES_LOCATION = { 698 prop: exp.Properties.Location.UNSUPPORTED 699 for prop in generator.Generator.PROPERTIES_LOCATION 700 } 701 702 # There are a few exceptions (e.g. temporary tables) which are supported or 703 # can be transpiled to DuckDB, so we explicitly override them accordingly 704 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 705 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 706 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 707 708 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 709 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 710 711 def strtotime_sql(self, expression: exp.StrToTime) -> str: 712 if expression.args.get("safe"): 713 formatted_time = self.format_time(expression) 714 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 715 return str_to_time_sql(self, expression) 716 717 def strtodate_sql(self, expression: exp.StrToDate) -> str: 718 if expression.args.get("safe"): 719 formatted_time = self.format_time(expression) 720 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 721 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 722 723 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 724 arg = expression.this 725 if expression.args.get("safe"): 726 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 727 return self.func("JSON", arg) 728 729 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 730 nano = expression.args.get("nano") 731 if nano is not None: 732 expression.set( 733 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 734 ) 735 736 return rename_func("MAKE_TIME")(self, expression) 737 738 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 739 sec = expression.args["sec"] 740 741 milli = expression.args.get("milli") 742 if milli is not None: 743 sec += milli.pop() / exp.Literal.number(1000.0) 744 745 nano = expression.args.get("nano") 746 if nano is not None: 747 sec += nano.pop() / exp.Literal.number(1000000000.0) 748 749 if milli or nano: 750 expression.set("sec", sec) 751 752 return rename_func("MAKE_TIMESTAMP")(self, expression) 753 754 def tablesample_sql( 755 self, 756 expression: exp.TableSample, 757 sep: str = " AS ", 758 tablesample_keyword: t.Optional[str] = None, 759 ) -> str: 760 if not isinstance(expression.parent, exp.Select): 761 # This sample clause only applies to a single source, not the entire resulting relation 762 tablesample_keyword = "TABLESAMPLE" 763 764 if expression.args.get("size"): 765 method = expression.args.get("method") 766 if method and method.name.upper() != "RESERVOIR": 767 self.unsupported( 768 f"Sampling method {method} is not supported with a discrete sample count, " 769 "defaulting to reservoir sampling" 770 ) 771 expression.set("method", exp.var("RESERVOIR")) 772 773 return super().tablesample_sql( 774 expression, sep=sep, tablesample_keyword=tablesample_keyword 775 ) 776 777 def interval_sql(self, expression: exp.Interval) -> str: 778 multiplier: t.Optional[int] = None 779 unit = expression.text("unit").lower() 780 781 if unit.startswith("week"): 782 multiplier = 7 783 if unit.startswith("quarter"): 784 multiplier = 90 785 786 if multiplier: 787 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 788 789 return super().interval_sql(expression) 790 791 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 792 if isinstance(expression.parent, exp.UserDefinedFunction): 793 return self.sql(expression, "this") 794 return super().columndef_sql(expression, sep) 795 796 def join_sql(self, expression: exp.Join) -> str: 797 if ( 798 expression.side == "LEFT" 799 and not expression.args.get("on") 800 and isinstance(expression.this, exp.Unnest) 801 ): 802 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 803 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 804 return super().join_sql(expression.on(exp.true())) 805 806 return super().join_sql(expression) 807 808 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 809 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 810 if expression.args.get("is_end_exclusive"): 811 return rename_func("RANGE")(self, expression) 812 813 return self.function_fallback_sql(expression) 814 815 def bracket_sql(self, expression: exp.Bracket) -> str: 816 this = expression.this 817 if isinstance(this, exp.Array): 818 this.replace(exp.paren(this)) 819 820 bracket = super().bracket_sql(expression) 821 822 if not expression.args.get("returns_list_for_maps"): 823 if not this.type: 824 from sqlglot.optimizer.annotate_types import annotate_types 825 826 this = annotate_types(this) 827 828 if this.is_type(exp.DataType.Type.MAP): 829 bracket = f"({bracket})[1]" 830 831 return bracket 832 833 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 834 expression_sql = self.sql(expression, "expression") 835 836 func = expression.this 837 if isinstance(func, exp.PERCENTILES): 838 # Make the order key the first arg and slide the fraction to the right 839 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 840 order_col = expression.find(exp.Ordered) 841 if order_col: 842 func.set("expression", func.this) 843 func.set("this", order_col.this) 844 845 this = self.sql(expression, "this").rstrip(")") 846 847 return f"{this}{expression_sql})" 848 849 def length_sql(self, expression: exp.Length) -> str: 850 arg = expression.this 851 852 # Dialects like BQ and Snowflake also accept binary values as args, so 853 # DDB will attempt to infer the type or resort to case/when resolution 854 if not expression.args.get("binary") or arg.is_string: 855 return self.func("LENGTH", arg) 856 857 if not arg.type: 858 from sqlglot.optimizer.annotate_types import annotate_types 859 860 arg = annotate_types(arg) 861 862 if arg.is_type(*exp.DataType.TEXT_TYPES): 863 return self.func("LENGTH", arg) 864 865 # We need these casts to make duckdb's static type checker happy 866 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 867 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 868 869 case = ( 870 exp.case(self.func("TYPEOF", arg)) 871 .when( 872 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 873 ) # anonymous to break length_sql recursion 874 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 875 ) 876 877 return self.sql(case) 878 879 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 880 this = expression.this 881 key = expression.args.get("key") 882 key_sql = key.name if isinstance(key, exp.Expression) else "" 883 value_sql = self.sql(expression, "value") 884 885 kv_sql = f"{key_sql} := {value_sql}" 886 887 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 888 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 889 if isinstance(this, exp.Struct) and not this.expressions: 890 return self.func("STRUCT_PACK", kv_sql) 891 892 return self.func("STRUCT_INSERT", this, kv_sql) 893 894 def unnest_sql(self, expression: exp.Unnest) -> str: 895 explode_array = expression.args.get("explode_array") 896 if explode_array: 897 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 898 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 899 expression.expressions.append( 900 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 901 ) 902 903 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 904 alias = expression.args.get("alias") 905 if alias: 906 expression.set("alias", None) 907 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 908 909 unnest_sql = super().unnest_sql(expression) 910 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 911 return self.sql(select) 912 913 return super().unnest_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
717 def strtodate_sql(self, expression: exp.StrToDate) -> str: 718 if expression.args.get("safe"): 719 formatted_time = self.format_time(expression) 720 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 721 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
729 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 730 nano = expression.args.get("nano") 731 if nano is not None: 732 expression.set( 733 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 734 ) 735 736 return rename_func("MAKE_TIME")(self, expression)
738 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 739 sec = expression.args["sec"] 740 741 milli = expression.args.get("milli") 742 if milli is not None: 743 sec += milli.pop() / exp.Literal.number(1000.0) 744 745 nano = expression.args.get("nano") 746 if nano is not None: 747 sec += nano.pop() / exp.Literal.number(1000000000.0) 748 749 if milli or nano: 750 expression.set("sec", sec) 751 752 return rename_func("MAKE_TIMESTAMP")(self, expression)
754 def tablesample_sql( 755 self, 756 expression: exp.TableSample, 757 sep: str = " AS ", 758 tablesample_keyword: t.Optional[str] = None, 759 ) -> str: 760 if not isinstance(expression.parent, exp.Select): 761 # This sample clause only applies to a single source, not the entire resulting relation 762 tablesample_keyword = "TABLESAMPLE" 763 764 if expression.args.get("size"): 765 method = expression.args.get("method") 766 if method and method.name.upper() != "RESERVOIR": 767 self.unsupported( 768 f"Sampling method {method} is not supported with a discrete sample count, " 769 "defaulting to reservoir sampling" 770 ) 771 expression.set("method", exp.var("RESERVOIR")) 772 773 return super().tablesample_sql( 774 expression, sep=sep, tablesample_keyword=tablesample_keyword 775 )
777 def interval_sql(self, expression: exp.Interval) -> str: 778 multiplier: t.Optional[int] = None 779 unit = expression.text("unit").lower() 780 781 if unit.startswith("week"): 782 multiplier = 7 783 if unit.startswith("quarter"): 784 multiplier = 90 785 786 if multiplier: 787 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 788 789 return super().interval_sql(expression)
796 def join_sql(self, expression: exp.Join) -> str: 797 if ( 798 expression.side == "LEFT" 799 and not expression.args.get("on") 800 and isinstance(expression.this, exp.Unnest) 801 ): 802 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 803 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 804 return super().join_sql(expression.on(exp.true())) 805 806 return super().join_sql(expression)
815 def bracket_sql(self, expression: exp.Bracket) -> str: 816 this = expression.this 817 if isinstance(this, exp.Array): 818 this.replace(exp.paren(this)) 819 820 bracket = super().bracket_sql(expression) 821 822 if not expression.args.get("returns_list_for_maps"): 823 if not this.type: 824 from sqlglot.optimizer.annotate_types import annotate_types 825 826 this = annotate_types(this) 827 828 if this.is_type(exp.DataType.Type.MAP): 829 bracket = f"({bracket})[1]" 830 831 return bracket
833 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 834 expression_sql = self.sql(expression, "expression") 835 836 func = expression.this 837 if isinstance(func, exp.PERCENTILES): 838 # Make the order key the first arg and slide the fraction to the right 839 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 840 order_col = expression.find(exp.Ordered) 841 if order_col: 842 func.set("expression", func.this) 843 func.set("this", order_col.this) 844 845 this = self.sql(expression, "this").rstrip(")") 846 847 return f"{this}{expression_sql})"
849 def length_sql(self, expression: exp.Length) -> str: 850 arg = expression.this 851 852 # Dialects like BQ and Snowflake also accept binary values as args, so 853 # DDB will attempt to infer the type or resort to case/when resolution 854 if not expression.args.get("binary") or arg.is_string: 855 return self.func("LENGTH", arg) 856 857 if not arg.type: 858 from sqlglot.optimizer.annotate_types import annotate_types 859 860 arg = annotate_types(arg) 861 862 if arg.is_type(*exp.DataType.TEXT_TYPES): 863 return self.func("LENGTH", arg) 864 865 # We need these casts to make duckdb's static type checker happy 866 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 867 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 868 869 case = ( 870 exp.case(self.func("TYPEOF", arg)) 871 .when( 872 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 873 ) # anonymous to break length_sql recursion 874 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 875 ) 876 877 return self.sql(case)
879 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 880 this = expression.this 881 key = expression.args.get("key") 882 key_sql = key.name if isinstance(key, exp.Expression) else "" 883 value_sql = self.sql(expression, "value") 884 885 kv_sql = f"{key_sql} := {value_sql}" 886 887 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 888 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 889 if isinstance(this, exp.Struct) and not this.expressions: 890 return self.func("STRUCT_PACK", kv_sql) 891 892 return self.func("STRUCT_INSERT", this, kv_sql)
894 def unnest_sql(self, expression: exp.Unnest) -> str: 895 explode_array = expression.args.get("explode_array") 896 if explode_array: 897 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 898 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 899 expression.expressions.append( 900 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 901 ) 902 903 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 904 alias = expression.args.get("alias") 905 if alias: 906 expression.set("alias", None) 907 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 908 909 unnest_sql = super().unnest_sql(expression) 910 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 911 return self.sql(select) 912 913 return super().unnest_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql