sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 regexp_extract_sql, 29 rename_func, 30 str_position_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36) 37from sqlglot.helper import seq_get 38from sqlglot.tokens import TokenType 39 40DATETIME_DELTA = t.Union[ 41 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 42] 43 44 45def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 46 this = expression.this 47 unit = unit_to_var(expression) 48 op = ( 49 "+" 50 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 51 else "-" 52 ) 53 54 to_type: t.Optional[DATA_TYPE] = None 55 if isinstance(expression, exp.TsOrDsAdd): 56 to_type = expression.return_type 57 elif this.is_string: 58 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 59 to_type = ( 60 exp.DataType.Type.DATETIME 61 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 62 else exp.DataType.Type.DATE 63 ) 64 65 this = exp.cast(this, to_type) if to_type else this 66 67 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 68 69 70# BigQuery -> DuckDB conversion for the DATE function 71def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 72 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 73 zone = self.sql(expression, "zone") 74 75 if zone: 76 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 77 date_str = f"{date_str} || ' ' || {zone}" 78 79 # This will create a TIMESTAMP with time zone information 80 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 81 82 return result 83 84 85# BigQuery -> DuckDB conversion for the TIME_DIFF function 86def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 87 this = exp.cast(expression.this, exp.DataType.Type.TIME) 88 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 89 90 # Although the 2 dialects share similar signatures, BQ seems to inverse 91 # the sign of the result so the start/end time operands are flipped 92 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 93 94 95def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 96 if expression.expression: 97 self.unsupported("DuckDB ARRAY_SORT does not support a comparator") 98 return self.func("ARRAY_SORT", expression.this) 99 100 101def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 102 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 103 return self.func(name, expression.this) 104 105 106def _build_sort_array_desc(args: t.List) -> exp.Expression: 107 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 108 109 110def _build_date_diff(args: t.List) -> exp.Expression: 111 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 112 113 114def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 115 def _builder(args: t.List) -> exp.GenerateSeries: 116 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 117 if len(args) == 1: 118 # DuckDB uses 0 as a default for the series' start when it's omitted 119 args.insert(0, exp.Literal.number("0")) 120 121 gen_series = exp.GenerateSeries.from_arg_list(args) 122 gen_series.set("is_end_exclusive", end_exclusive) 123 124 return gen_series 125 126 return _builder 127 128 129def _build_make_timestamp(args: t.List) -> exp.Expression: 130 if len(args) == 1: 131 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 132 133 return exp.TimestampFromParts( 134 year=seq_get(args, 0), 135 month=seq_get(args, 1), 136 day=seq_get(args, 2), 137 hour=seq_get(args, 3), 138 min=seq_get(args, 4), 139 sec=seq_get(args, 5), 140 ) 141 142 143def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 144 args: t.List[str] = [] 145 146 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 147 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 148 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 149 ancestor_cast = expression.find_ancestor(exp.Cast) 150 is_struct_cast = ancestor_cast and any( 151 casted_type.is_type(exp.DataType.Type.STRUCT) 152 for casted_type in ancestor_cast.find_all(exp.DataType) 153 ) 154 155 for i, expr in enumerate(expression.expressions): 156 is_property_eq = isinstance(expr, exp.PropertyEQ) 157 value = expr.expression if is_property_eq else expr 158 159 if is_struct_cast: 160 args.append(self.sql(value)) 161 else: 162 key = expr.name if is_property_eq else f"_{i}" 163 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 164 165 csv_args = ", ".join(args) 166 167 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 168 169 170def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 171 if expression.is_type("array"): 172 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 173 174 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 175 if expression.is_type( 176 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 177 ): 178 return expression.this.value 179 180 return self.datatype_sql(expression) 181 182 183def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 184 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 185 return f"CAST({sql} AS TEXT)" 186 187 188def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 189 scale = expression.args.get("scale") 190 timestamp = expression.this 191 192 if scale in (None, exp.UnixToTime.SECONDS): 193 return self.func("TO_TIMESTAMP", timestamp) 194 if scale == exp.UnixToTime.MILLIS: 195 return self.func("EPOCH_MS", timestamp) 196 if scale == exp.UnixToTime.MICROS: 197 return self.func("MAKE_TIMESTAMP", timestamp) 198 199 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 200 201 202WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 203 204 205def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 206 arrow_sql = arrow_json_extract_sql(self, expression) 207 if not expression.same_parent and isinstance( 208 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 209 ): 210 arrow_sql = self.wrap(arrow_sql) 211 return arrow_sql 212 213 214def _implicit_datetime_cast( 215 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 216) -> t.Optional[exp.Expression]: 217 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 218 219 220def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 221 this = _implicit_datetime_cast(expression.this) 222 expr = _implicit_datetime_cast(expression.expression) 223 224 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 225 226 227def _generate_datetime_array_sql( 228 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 229) -> str: 230 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 231 232 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 233 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 234 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 235 236 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 237 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 238 start=start, end=end, step=expression.args.get("step") 239 ) 240 241 if is_generate_date_array: 242 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 243 # GENERATE_DATE_ARRAY we must cast it back to DATE array 244 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 245 246 return self.sql(gen_series) 247 248 249class DuckDB(Dialect): 250 NULL_ORDERING = "nulls_are_last" 251 SUPPORTS_USER_DEFINED_TYPES = False 252 SAFE_DIVISION = True 253 INDEX_OFFSET = 1 254 CONCAT_COALESCE = True 255 SUPPORTS_ORDER_BY_ALL = True 256 SUPPORTS_FIXED_SIZE_ARRAYS = True 257 258 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 259 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 260 261 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 262 if isinstance(path, exp.Literal): 263 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 264 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 265 # This check ensures we'll avoid trying to parse these as JSON paths, which can 266 # either result in a noisy warning or in an invalid representation of the path. 267 path_text = path.name 268 if path_text.startswith("/") or "[#" in path_text: 269 return path 270 271 return super().to_json_path(path) 272 273 class Tokenizer(tokens.Tokenizer): 274 HEREDOC_STRINGS = ["$"] 275 276 HEREDOC_TAG_IS_IDENTIFIER = True 277 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 278 279 KEYWORDS = { 280 **tokens.Tokenizer.KEYWORDS, 281 "//": TokenType.DIV, 282 "ATTACH": TokenType.COMMAND, 283 "BINARY": TokenType.VARBINARY, 284 "BITSTRING": TokenType.BIT, 285 "BPCHAR": TokenType.TEXT, 286 "CHAR": TokenType.TEXT, 287 "CHARACTER VARYING": TokenType.TEXT, 288 "EXCLUDE": TokenType.EXCEPT, 289 "LOGICAL": TokenType.BOOLEAN, 290 "ONLY": TokenType.ONLY, 291 "PIVOT_WIDER": TokenType.PIVOT, 292 "POSITIONAL": TokenType.POSITIONAL, 293 "SIGNED": TokenType.INT, 294 "STRING": TokenType.TEXT, 295 "SUMMARIZE": TokenType.SUMMARIZE, 296 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 297 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 298 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 299 "TIMESTAMP_US": TokenType.TIMESTAMP, 300 "UBIGINT": TokenType.UBIGINT, 301 "UINTEGER": TokenType.UINT, 302 "USMALLINT": TokenType.USMALLINT, 303 "UTINYINT": TokenType.UTINYINT, 304 "VARCHAR": TokenType.TEXT, 305 } 306 KEYWORDS.pop("/*+") 307 308 SINGLE_TOKENS = { 309 **tokens.Tokenizer.SINGLE_TOKENS, 310 "$": TokenType.PARAMETER, 311 } 312 313 class Parser(parser.Parser): 314 BITWISE = { 315 **parser.Parser.BITWISE, 316 TokenType.TILDA: exp.RegexpLike, 317 } 318 319 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 320 321 FUNCTIONS = { 322 **parser.Parser.FUNCTIONS, 323 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 324 "ARRAY_SORT": exp.SortArray.from_arg_list, 325 "DATEDIFF": _build_date_diff, 326 "DATE_DIFF": _build_date_diff, 327 "DATE_TRUNC": date_trunc_to_time, 328 "DATETRUNC": date_trunc_to_time, 329 "DECODE": lambda args: exp.Decode( 330 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 331 ), 332 "ENCODE": lambda args: exp.Encode( 333 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 334 ), 335 "EPOCH": exp.TimeToUnix.from_arg_list, 336 "EPOCH_MS": lambda args: exp.UnixToTime( 337 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 338 ), 339 "JSON": exp.ParseJSON.from_arg_list, 340 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 341 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 342 "LIST_HAS": exp.ArrayContains.from_arg_list, 343 "LIST_REVERSE_SORT": _build_sort_array_desc, 344 "LIST_SORT": exp.SortArray.from_arg_list, 345 "LIST_VALUE": lambda args: exp.Array(expressions=args), 346 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 347 "MAKE_TIMESTAMP": _build_make_timestamp, 348 "MEDIAN": lambda args: exp.PercentileCont( 349 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 350 ), 351 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 352 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 353 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 354 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 355 ), 356 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 357 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 358 this=seq_get(args, 0), 359 expression=seq_get(args, 1), 360 replacement=seq_get(args, 2), 361 modifiers=seq_get(args, 3), 362 ), 363 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 364 "STRING_SPLIT": exp.Split.from_arg_list, 365 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 366 "STRING_TO_ARRAY": exp.Split.from_arg_list, 367 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 368 "STRUCT_PACK": exp.Struct.from_arg_list, 369 "STR_SPLIT": exp.Split.from_arg_list, 370 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 371 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 372 "UNNEST": exp.Explode.from_arg_list, 373 "XOR": binary_from_function(exp.BitwiseXor), 374 "GENERATE_SERIES": _build_generate_series(), 375 "RANGE": _build_generate_series(end_exclusive=True), 376 } 377 378 FUNCTIONS.pop("DATE_SUB") 379 380 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 381 FUNCTION_PARSERS.pop("DECODE") 382 383 NO_PAREN_FUNCTION_PARSERS = { 384 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 385 "MAP": lambda self: self._parse_map(), 386 } 387 388 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 389 TokenType.SEMI, 390 TokenType.ANTI, 391 } 392 393 PLACEHOLDER_PARSERS = { 394 **parser.Parser.PLACEHOLDER_PARSERS, 395 TokenType.PARAMETER: lambda self: ( 396 self.expression(exp.Placeholder, this=self._prev.text) 397 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 398 else None 399 ), 400 } 401 402 TYPE_CONVERTERS = { 403 # https://duckdb.org/docs/sql/data_types/numeric 404 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 405 # https://duckdb.org/docs/sql/data_types/text 406 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 407 } 408 409 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 410 # https://duckdb.org/docs/sql/samples.html 411 sample = super()._parse_table_sample(as_modifier=as_modifier) 412 if sample and not sample.args.get("method"): 413 if sample.args.get("size"): 414 sample.set("method", exp.var("RESERVOIR")) 415 else: 416 sample.set("method", exp.var("SYSTEM")) 417 418 return sample 419 420 def _parse_bracket( 421 self, this: t.Optional[exp.Expression] = None 422 ) -> t.Optional[exp.Expression]: 423 bracket = super()._parse_bracket(this) 424 if isinstance(bracket, exp.Bracket): 425 bracket.set("returns_list_for_maps", True) 426 427 return bracket 428 429 def _parse_map(self) -> exp.ToMap | exp.Map: 430 if self._match(TokenType.L_BRACE, advance=False): 431 return self.expression(exp.ToMap, this=self._parse_bracket()) 432 433 args = self._parse_wrapped_csv(self._parse_assignment) 434 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 435 436 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 437 return self._parse_field_def() 438 439 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 440 if len(aggregations) == 1: 441 return super()._pivot_column_names(aggregations) 442 return pivot_column_names(aggregations, dialect="duckdb") 443 444 class Generator(generator.Generator): 445 PARAMETER_TOKEN = "$" 446 NAMED_PLACEHOLDER_TOKEN = "$" 447 JOIN_HINTS = False 448 TABLE_HINTS = False 449 QUERY_HINTS = False 450 LIMIT_FETCH = "LIMIT" 451 STRUCT_DELIMITER = ("(", ")") 452 RENAME_TABLE_WITH_DB = False 453 NVL2_SUPPORTED = False 454 SEMI_ANTI_JOIN_WITH_SIDE = False 455 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 456 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 457 LAST_DAY_SUPPORTS_DATE_PART = False 458 JSON_KEY_VALUE_PAIR_SEP = "," 459 IGNORE_NULLS_IN_FUNC = True 460 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 461 SUPPORTS_CREATE_TABLE_LIKE = False 462 MULTI_ARG_DISTINCT = False 463 CAN_IMPLEMENT_ARRAY_ANY = True 464 SUPPORTS_TO_NUMBER = False 465 COPY_HAS_INTO_KEYWORD = False 466 STAR_EXCEPT = "EXCLUDE" 467 PAD_FILL_PATTERN_IS_REQUIRED = True 468 ARRAY_CONCAT_IS_VAR_LEN = False 469 470 TRANSFORMS = { 471 **generator.Generator.TRANSFORMS, 472 exp.ApproxDistinct: approx_count_distinct_sql, 473 exp.Array: inline_array_unless_query, 474 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 475 exp.ArrayFilter: rename_func("LIST_FILTER"), 476 exp.ArraySize: rename_func("ARRAY_LENGTH"), 477 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 478 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 479 exp.ArraySort: _array_sort_sql, 480 exp.ArraySum: rename_func("LIST_SUM"), 481 exp.BitwiseXor: rename_func("XOR"), 482 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 483 exp.CurrentDate: lambda *_: "CURRENT_DATE", 484 exp.CurrentTime: lambda *_: "CURRENT_TIME", 485 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 486 exp.DayOfMonth: rename_func("DAYOFMONTH"), 487 exp.DayOfWeek: rename_func("DAYOFWEEK"), 488 exp.DayOfYear: rename_func("DAYOFYEAR"), 489 exp.DataType: _datatype_sql, 490 exp.Date: _date_sql, 491 exp.DateAdd: _date_delta_sql, 492 exp.DateFromParts: rename_func("MAKE_DATE"), 493 exp.DateSub: _date_delta_sql, 494 exp.DateDiff: _date_diff_sql, 495 exp.DateStrToDate: datestrtodate_sql, 496 exp.Datetime: no_datetime_sql, 497 exp.DatetimeSub: _date_delta_sql, 498 exp.DatetimeAdd: _date_delta_sql, 499 exp.DateToDi: lambda self, 500 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 501 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 502 exp.DiToDate: lambda self, 503 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 504 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 505 exp.GenerateDateArray: _generate_datetime_array_sql, 506 exp.GenerateTimestampArray: _generate_datetime_array_sql, 507 exp.Explode: rename_func("UNNEST"), 508 exp.IntDiv: lambda self, e: self.binary(e, "//"), 509 exp.IsInf: rename_func("ISINF"), 510 exp.IsNan: rename_func("ISNAN"), 511 exp.JSONExtract: _arrow_json_extract_sql, 512 exp.JSONExtractScalar: _arrow_json_extract_sql, 513 exp.JSONFormat: _json_format_sql, 514 exp.LogicalOr: rename_func("BOOL_OR"), 515 exp.LogicalAnd: rename_func("BOOL_AND"), 516 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 517 exp.MonthsBetween: lambda self, e: self.func( 518 "DATEDIFF", 519 "'month'", 520 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 521 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 522 ), 523 exp.PercentileCont: rename_func("QUANTILE_CONT"), 524 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 525 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 526 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 527 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 528 exp.RegexpExtract: regexp_extract_sql, 529 exp.RegexpReplace: lambda self, e: self.func( 530 "REGEXP_REPLACE", 531 e.this, 532 e.expression, 533 e.args.get("replacement"), 534 e.args.get("modifiers"), 535 ), 536 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 537 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 538 exp.Return: lambda self, e: self.sql(e, "this"), 539 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 540 exp.Rand: rename_func("RANDOM"), 541 exp.SafeDivide: no_safe_divide_sql, 542 exp.Split: rename_func("STR_SPLIT"), 543 exp.SortArray: _sort_array_sql, 544 exp.StrPosition: str_position_sql, 545 exp.StrToUnix: lambda self, e: self.func( 546 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 547 ), 548 exp.Struct: _struct_sql, 549 exp.TimeAdd: _date_delta_sql, 550 exp.Time: no_time_sql, 551 exp.TimeDiff: _timediff_sql, 552 exp.Timestamp: no_timestamp_sql, 553 exp.TimestampDiff: lambda self, e: self.func( 554 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 555 ), 556 exp.TimestampTrunc: timestamptrunc_sql(), 557 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 558 exp.TimeStrToTime: timestrtotime_sql, 559 exp.TimeStrToUnix: lambda self, e: self.func( 560 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 561 ), 562 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 563 exp.TimeToUnix: rename_func("EPOCH"), 564 exp.TsOrDiToDi: lambda self, 565 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 566 exp.TsOrDsAdd: _date_delta_sql, 567 exp.TsOrDsDiff: lambda self, e: self.func( 568 "DATE_DIFF", 569 f"'{e.args.get('unit') or 'DAY'}'", 570 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 571 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 572 ), 573 exp.UnixToStr: lambda self, e: self.func( 574 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 575 ), 576 exp.DatetimeTrunc: lambda self, e: self.func( 577 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 578 ), 579 exp.UnixToTime: _unix_to_time_sql, 580 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 581 exp.VariancePop: rename_func("VAR_POP"), 582 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 583 exp.Xor: bool_xor_sql, 584 } 585 586 SUPPORTED_JSON_PATH_PARTS = { 587 exp.JSONPathKey, 588 exp.JSONPathRoot, 589 exp.JSONPathSubscript, 590 exp.JSONPathWildcard, 591 } 592 593 TYPE_MAPPING = { 594 **generator.Generator.TYPE_MAPPING, 595 exp.DataType.Type.BINARY: "BLOB", 596 exp.DataType.Type.BPCHAR: "TEXT", 597 exp.DataType.Type.CHAR: "TEXT", 598 exp.DataType.Type.FLOAT: "REAL", 599 exp.DataType.Type.NCHAR: "TEXT", 600 exp.DataType.Type.NVARCHAR: "TEXT", 601 exp.DataType.Type.UINT: "UINTEGER", 602 exp.DataType.Type.VARBINARY: "BLOB", 603 exp.DataType.Type.ROWVERSION: "BLOB", 604 exp.DataType.Type.VARCHAR: "TEXT", 605 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 606 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 607 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 608 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 609 } 610 611 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 612 RESERVED_KEYWORDS = { 613 "array", 614 "analyse", 615 "union", 616 "all", 617 "when", 618 "in_p", 619 "default", 620 "create_p", 621 "window", 622 "asymmetric", 623 "to", 624 "else", 625 "localtime", 626 "from", 627 "end_p", 628 "select", 629 "current_date", 630 "foreign", 631 "with", 632 "grant", 633 "session_user", 634 "or", 635 "except", 636 "references", 637 "fetch", 638 "limit", 639 "group_p", 640 "leading", 641 "into", 642 "collate", 643 "offset", 644 "do", 645 "then", 646 "localtimestamp", 647 "check_p", 648 "lateral_p", 649 "current_role", 650 "where", 651 "asc_p", 652 "placing", 653 "desc_p", 654 "user", 655 "unique", 656 "initially", 657 "column", 658 "both", 659 "some", 660 "as", 661 "any", 662 "only", 663 "deferrable", 664 "null_p", 665 "current_time", 666 "true_p", 667 "table", 668 "case", 669 "trailing", 670 "variadic", 671 "for", 672 "on", 673 "distinct", 674 "false_p", 675 "not", 676 "constraint", 677 "current_timestamp", 678 "returning", 679 "primary", 680 "intersect", 681 "having", 682 "analyze", 683 "current_user", 684 "and", 685 "cast", 686 "symmetric", 687 "using", 688 "order", 689 "current_catalog", 690 } 691 692 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 693 694 # DuckDB doesn't generally support CREATE TABLE .. properties 695 # https://duckdb.org/docs/sql/statements/create_table.html 696 PROPERTIES_LOCATION = { 697 prop: exp.Properties.Location.UNSUPPORTED 698 for prop in generator.Generator.PROPERTIES_LOCATION 699 } 700 701 # There are a few exceptions (e.g. temporary tables) which are supported or 702 # can be transpiled to DuckDB, so we explicitly override them accordingly 703 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 704 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 705 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 706 707 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 708 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 709 710 def strtotime_sql(self, expression: exp.StrToTime) -> str: 711 if expression.args.get("safe"): 712 formatted_time = self.format_time(expression) 713 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 714 return str_to_time_sql(self, expression) 715 716 def strtodate_sql(self, expression: exp.StrToDate) -> str: 717 if expression.args.get("safe"): 718 formatted_time = self.format_time(expression) 719 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 720 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 721 722 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 723 arg = expression.this 724 if expression.args.get("safe"): 725 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 726 return self.func("JSON", arg) 727 728 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 729 nano = expression.args.get("nano") 730 if nano is not None: 731 expression.set( 732 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 733 ) 734 735 return rename_func("MAKE_TIME")(self, expression) 736 737 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 738 sec = expression.args["sec"] 739 740 milli = expression.args.get("milli") 741 if milli is not None: 742 sec += milli.pop() / exp.Literal.number(1000.0) 743 744 nano = expression.args.get("nano") 745 if nano is not None: 746 sec += nano.pop() / exp.Literal.number(1000000000.0) 747 748 if milli or nano: 749 expression.set("sec", sec) 750 751 return rename_func("MAKE_TIMESTAMP")(self, expression) 752 753 def tablesample_sql( 754 self, 755 expression: exp.TableSample, 756 tablesample_keyword: t.Optional[str] = None, 757 ) -> str: 758 if not isinstance(expression.parent, exp.Select): 759 # This sample clause only applies to a single source, not the entire resulting relation 760 tablesample_keyword = "TABLESAMPLE" 761 762 if expression.args.get("size"): 763 method = expression.args.get("method") 764 if method and method.name.upper() != "RESERVOIR": 765 self.unsupported( 766 f"Sampling method {method} is not supported with a discrete sample count, " 767 "defaulting to reservoir sampling" 768 ) 769 expression.set("method", exp.var("RESERVOIR")) 770 771 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 772 773 def interval_sql(self, expression: exp.Interval) -> str: 774 multiplier: t.Optional[int] = None 775 unit = expression.text("unit").lower() 776 777 if unit.startswith("week"): 778 multiplier = 7 779 if unit.startswith("quarter"): 780 multiplier = 90 781 782 if multiplier: 783 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 784 785 return super().interval_sql(expression) 786 787 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 788 if isinstance(expression.parent, exp.UserDefinedFunction): 789 return self.sql(expression, "this") 790 return super().columndef_sql(expression, sep) 791 792 def join_sql(self, expression: exp.Join) -> str: 793 if ( 794 expression.side == "LEFT" 795 and not expression.args.get("on") 796 and isinstance(expression.this, exp.Unnest) 797 ): 798 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 799 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 800 return super().join_sql(expression.on(exp.true())) 801 802 return super().join_sql(expression) 803 804 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 805 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 806 if expression.args.get("is_end_exclusive"): 807 return rename_func("RANGE")(self, expression) 808 809 return self.function_fallback_sql(expression) 810 811 def bracket_sql(self, expression: exp.Bracket) -> str: 812 this = expression.this 813 if isinstance(this, exp.Array): 814 this.replace(exp.paren(this)) 815 816 bracket = super().bracket_sql(expression) 817 818 if not expression.args.get("returns_list_for_maps"): 819 if not this.type: 820 from sqlglot.optimizer.annotate_types import annotate_types 821 822 this = annotate_types(this) 823 824 if this.is_type(exp.DataType.Type.MAP): 825 bracket = f"({bracket})[1]" 826 827 return bracket 828 829 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 830 expression_sql = self.sql(expression, "expression") 831 832 func = expression.this 833 if isinstance(func, exp.PERCENTILES): 834 # Make the order key the first arg and slide the fraction to the right 835 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 836 order_col = expression.find(exp.Ordered) 837 if order_col: 838 func.set("expression", func.this) 839 func.set("this", order_col.this) 840 841 this = self.sql(expression, "this").rstrip(")") 842 843 return f"{this}{expression_sql})" 844 845 def length_sql(self, expression: exp.Length) -> str: 846 arg = expression.this 847 848 # Dialects like BQ and Snowflake also accept binary values as args, so 849 # DDB will attempt to infer the type or resort to case/when resolution 850 if not expression.args.get("binary") or arg.is_string: 851 return self.func("LENGTH", arg) 852 853 if not arg.type: 854 from sqlglot.optimizer.annotate_types import annotate_types 855 856 arg = annotate_types(arg) 857 858 if arg.is_type(*exp.DataType.TEXT_TYPES): 859 return self.func("LENGTH", arg) 860 861 # We need these casts to make duckdb's static type checker happy 862 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 863 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 864 865 case = ( 866 exp.case(self.func("TYPEOF", arg)) 867 .when( 868 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 869 ) # anonymous to break length_sql recursion 870 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 871 ) 872 873 return self.sql(case) 874 875 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 876 this = expression.this 877 key = expression.args.get("key") 878 key_sql = key.name if isinstance(key, exp.Expression) else "" 879 value_sql = self.sql(expression, "value") 880 881 kv_sql = f"{key_sql} := {value_sql}" 882 883 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 884 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 885 if isinstance(this, exp.Struct) and not this.expressions: 886 return self.func("STRUCT_PACK", kv_sql) 887 888 return self.func("STRUCT_INSERT", this, kv_sql) 889 890 def unnest_sql(self, expression: exp.Unnest) -> str: 891 explode_array = expression.args.get("explode_array") 892 if explode_array: 893 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 894 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 895 expression.expressions.append( 896 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 897 ) 898 899 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 900 alias = expression.args.get("alias") 901 if alias: 902 expression.set("alias", None) 903 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 904 905 unnest_sql = super().unnest_sql(expression) 906 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 907 return self.sql(select) 908 909 return super().unnest_sql(expression)
250class DuckDB(Dialect): 251 NULL_ORDERING = "nulls_are_last" 252 SUPPORTS_USER_DEFINED_TYPES = False 253 SAFE_DIVISION = True 254 INDEX_OFFSET = 1 255 CONCAT_COALESCE = True 256 SUPPORTS_ORDER_BY_ALL = True 257 SUPPORTS_FIXED_SIZE_ARRAYS = True 258 259 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 260 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 261 262 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 263 if isinstance(path, exp.Literal): 264 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 265 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 266 # This check ensures we'll avoid trying to parse these as JSON paths, which can 267 # either result in a noisy warning or in an invalid representation of the path. 268 path_text = path.name 269 if path_text.startswith("/") or "[#" in path_text: 270 return path 271 272 return super().to_json_path(path) 273 274 class Tokenizer(tokens.Tokenizer): 275 HEREDOC_STRINGS = ["$"] 276 277 HEREDOC_TAG_IS_IDENTIFIER = True 278 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 279 280 KEYWORDS = { 281 **tokens.Tokenizer.KEYWORDS, 282 "//": TokenType.DIV, 283 "ATTACH": TokenType.COMMAND, 284 "BINARY": TokenType.VARBINARY, 285 "BITSTRING": TokenType.BIT, 286 "BPCHAR": TokenType.TEXT, 287 "CHAR": TokenType.TEXT, 288 "CHARACTER VARYING": TokenType.TEXT, 289 "EXCLUDE": TokenType.EXCEPT, 290 "LOGICAL": TokenType.BOOLEAN, 291 "ONLY": TokenType.ONLY, 292 "PIVOT_WIDER": TokenType.PIVOT, 293 "POSITIONAL": TokenType.POSITIONAL, 294 "SIGNED": TokenType.INT, 295 "STRING": TokenType.TEXT, 296 "SUMMARIZE": TokenType.SUMMARIZE, 297 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 298 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 299 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 300 "TIMESTAMP_US": TokenType.TIMESTAMP, 301 "UBIGINT": TokenType.UBIGINT, 302 "UINTEGER": TokenType.UINT, 303 "USMALLINT": TokenType.USMALLINT, 304 "UTINYINT": TokenType.UTINYINT, 305 "VARCHAR": TokenType.TEXT, 306 } 307 KEYWORDS.pop("/*+") 308 309 SINGLE_TOKENS = { 310 **tokens.Tokenizer.SINGLE_TOKENS, 311 "$": TokenType.PARAMETER, 312 } 313 314 class Parser(parser.Parser): 315 BITWISE = { 316 **parser.Parser.BITWISE, 317 TokenType.TILDA: exp.RegexpLike, 318 } 319 320 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 325 "ARRAY_SORT": exp.SortArray.from_arg_list, 326 "DATEDIFF": _build_date_diff, 327 "DATE_DIFF": _build_date_diff, 328 "DATE_TRUNC": date_trunc_to_time, 329 "DATETRUNC": date_trunc_to_time, 330 "DECODE": lambda args: exp.Decode( 331 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 332 ), 333 "ENCODE": lambda args: exp.Encode( 334 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 335 ), 336 "EPOCH": exp.TimeToUnix.from_arg_list, 337 "EPOCH_MS": lambda args: exp.UnixToTime( 338 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 339 ), 340 "JSON": exp.ParseJSON.from_arg_list, 341 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 342 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 343 "LIST_HAS": exp.ArrayContains.from_arg_list, 344 "LIST_REVERSE_SORT": _build_sort_array_desc, 345 "LIST_SORT": exp.SortArray.from_arg_list, 346 "LIST_VALUE": lambda args: exp.Array(expressions=args), 347 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 348 "MAKE_TIMESTAMP": _build_make_timestamp, 349 "MEDIAN": lambda args: exp.PercentileCont( 350 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 351 ), 352 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 353 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 356 ), 357 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 358 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 359 this=seq_get(args, 0), 360 expression=seq_get(args, 1), 361 replacement=seq_get(args, 2), 362 modifiers=seq_get(args, 3), 363 ), 364 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 365 "STRING_SPLIT": exp.Split.from_arg_list, 366 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 367 "STRING_TO_ARRAY": exp.Split.from_arg_list, 368 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 369 "STRUCT_PACK": exp.Struct.from_arg_list, 370 "STR_SPLIT": exp.Split.from_arg_list, 371 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 372 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 373 "UNNEST": exp.Explode.from_arg_list, 374 "XOR": binary_from_function(exp.BitwiseXor), 375 "GENERATE_SERIES": _build_generate_series(), 376 "RANGE": _build_generate_series(end_exclusive=True), 377 } 378 379 FUNCTIONS.pop("DATE_SUB") 380 381 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 382 FUNCTION_PARSERS.pop("DECODE") 383 384 NO_PAREN_FUNCTION_PARSERS = { 385 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 386 "MAP": lambda self: self._parse_map(), 387 } 388 389 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 390 TokenType.SEMI, 391 TokenType.ANTI, 392 } 393 394 PLACEHOLDER_PARSERS = { 395 **parser.Parser.PLACEHOLDER_PARSERS, 396 TokenType.PARAMETER: lambda self: ( 397 self.expression(exp.Placeholder, this=self._prev.text) 398 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 399 else None 400 ), 401 } 402 403 TYPE_CONVERTERS = { 404 # https://duckdb.org/docs/sql/data_types/numeric 405 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 406 # https://duckdb.org/docs/sql/data_types/text 407 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 408 } 409 410 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 411 # https://duckdb.org/docs/sql/samples.html 412 sample = super()._parse_table_sample(as_modifier=as_modifier) 413 if sample and not sample.args.get("method"): 414 if sample.args.get("size"): 415 sample.set("method", exp.var("RESERVOIR")) 416 else: 417 sample.set("method", exp.var("SYSTEM")) 418 419 return sample 420 421 def _parse_bracket( 422 self, this: t.Optional[exp.Expression] = None 423 ) -> t.Optional[exp.Expression]: 424 bracket = super()._parse_bracket(this) 425 if isinstance(bracket, exp.Bracket): 426 bracket.set("returns_list_for_maps", True) 427 428 return bracket 429 430 def _parse_map(self) -> exp.ToMap | exp.Map: 431 if self._match(TokenType.L_BRACE, advance=False): 432 return self.expression(exp.ToMap, this=self._parse_bracket()) 433 434 args = self._parse_wrapped_csv(self._parse_assignment) 435 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 436 437 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 438 return self._parse_field_def() 439 440 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 441 if len(aggregations) == 1: 442 return super()._pivot_column_names(aggregations) 443 return pivot_column_names(aggregations, dialect="duckdb") 444 445 class Generator(generator.Generator): 446 PARAMETER_TOKEN = "$" 447 NAMED_PLACEHOLDER_TOKEN = "$" 448 JOIN_HINTS = False 449 TABLE_HINTS = False 450 QUERY_HINTS = False 451 LIMIT_FETCH = "LIMIT" 452 STRUCT_DELIMITER = ("(", ")") 453 RENAME_TABLE_WITH_DB = False 454 NVL2_SUPPORTED = False 455 SEMI_ANTI_JOIN_WITH_SIDE = False 456 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 457 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 458 LAST_DAY_SUPPORTS_DATE_PART = False 459 JSON_KEY_VALUE_PAIR_SEP = "," 460 IGNORE_NULLS_IN_FUNC = True 461 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 462 SUPPORTS_CREATE_TABLE_LIKE = False 463 MULTI_ARG_DISTINCT = False 464 CAN_IMPLEMENT_ARRAY_ANY = True 465 SUPPORTS_TO_NUMBER = False 466 COPY_HAS_INTO_KEYWORD = False 467 STAR_EXCEPT = "EXCLUDE" 468 PAD_FILL_PATTERN_IS_REQUIRED = True 469 ARRAY_CONCAT_IS_VAR_LEN = False 470 471 TRANSFORMS = { 472 **generator.Generator.TRANSFORMS, 473 exp.ApproxDistinct: approx_count_distinct_sql, 474 exp.Array: inline_array_unless_query, 475 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 476 exp.ArrayFilter: rename_func("LIST_FILTER"), 477 exp.ArraySize: rename_func("ARRAY_LENGTH"), 478 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 479 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 480 exp.ArraySort: _array_sort_sql, 481 exp.ArraySum: rename_func("LIST_SUM"), 482 exp.BitwiseXor: rename_func("XOR"), 483 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 484 exp.CurrentDate: lambda *_: "CURRENT_DATE", 485 exp.CurrentTime: lambda *_: "CURRENT_TIME", 486 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 487 exp.DayOfMonth: rename_func("DAYOFMONTH"), 488 exp.DayOfWeek: rename_func("DAYOFWEEK"), 489 exp.DayOfYear: rename_func("DAYOFYEAR"), 490 exp.DataType: _datatype_sql, 491 exp.Date: _date_sql, 492 exp.DateAdd: _date_delta_sql, 493 exp.DateFromParts: rename_func("MAKE_DATE"), 494 exp.DateSub: _date_delta_sql, 495 exp.DateDiff: _date_diff_sql, 496 exp.DateStrToDate: datestrtodate_sql, 497 exp.Datetime: no_datetime_sql, 498 exp.DatetimeSub: _date_delta_sql, 499 exp.DatetimeAdd: _date_delta_sql, 500 exp.DateToDi: lambda self, 501 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 502 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 503 exp.DiToDate: lambda self, 504 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 505 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 506 exp.GenerateDateArray: _generate_datetime_array_sql, 507 exp.GenerateTimestampArray: _generate_datetime_array_sql, 508 exp.Explode: rename_func("UNNEST"), 509 exp.IntDiv: lambda self, e: self.binary(e, "//"), 510 exp.IsInf: rename_func("ISINF"), 511 exp.IsNan: rename_func("ISNAN"), 512 exp.JSONExtract: _arrow_json_extract_sql, 513 exp.JSONExtractScalar: _arrow_json_extract_sql, 514 exp.JSONFormat: _json_format_sql, 515 exp.LogicalOr: rename_func("BOOL_OR"), 516 exp.LogicalAnd: rename_func("BOOL_AND"), 517 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 518 exp.MonthsBetween: lambda self, e: self.func( 519 "DATEDIFF", 520 "'month'", 521 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 522 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 523 ), 524 exp.PercentileCont: rename_func("QUANTILE_CONT"), 525 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 526 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 527 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 528 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 529 exp.RegexpExtract: regexp_extract_sql, 530 exp.RegexpReplace: lambda self, e: self.func( 531 "REGEXP_REPLACE", 532 e.this, 533 e.expression, 534 e.args.get("replacement"), 535 e.args.get("modifiers"), 536 ), 537 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 538 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 539 exp.Return: lambda self, e: self.sql(e, "this"), 540 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 541 exp.Rand: rename_func("RANDOM"), 542 exp.SafeDivide: no_safe_divide_sql, 543 exp.Split: rename_func("STR_SPLIT"), 544 exp.SortArray: _sort_array_sql, 545 exp.StrPosition: str_position_sql, 546 exp.StrToUnix: lambda self, e: self.func( 547 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 548 ), 549 exp.Struct: _struct_sql, 550 exp.TimeAdd: _date_delta_sql, 551 exp.Time: no_time_sql, 552 exp.TimeDiff: _timediff_sql, 553 exp.Timestamp: no_timestamp_sql, 554 exp.TimestampDiff: lambda self, e: self.func( 555 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 556 ), 557 exp.TimestampTrunc: timestamptrunc_sql(), 558 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 559 exp.TimeStrToTime: timestrtotime_sql, 560 exp.TimeStrToUnix: lambda self, e: self.func( 561 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 562 ), 563 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 564 exp.TimeToUnix: rename_func("EPOCH"), 565 exp.TsOrDiToDi: lambda self, 566 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 567 exp.TsOrDsAdd: _date_delta_sql, 568 exp.TsOrDsDiff: lambda self, e: self.func( 569 "DATE_DIFF", 570 f"'{e.args.get('unit') or 'DAY'}'", 571 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 572 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 573 ), 574 exp.UnixToStr: lambda self, e: self.func( 575 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 576 ), 577 exp.DatetimeTrunc: lambda self, e: self.func( 578 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 579 ), 580 exp.UnixToTime: _unix_to_time_sql, 581 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 582 exp.VariancePop: rename_func("VAR_POP"), 583 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 584 exp.Xor: bool_xor_sql, 585 } 586 587 SUPPORTED_JSON_PATH_PARTS = { 588 exp.JSONPathKey, 589 exp.JSONPathRoot, 590 exp.JSONPathSubscript, 591 exp.JSONPathWildcard, 592 } 593 594 TYPE_MAPPING = { 595 **generator.Generator.TYPE_MAPPING, 596 exp.DataType.Type.BINARY: "BLOB", 597 exp.DataType.Type.BPCHAR: "TEXT", 598 exp.DataType.Type.CHAR: "TEXT", 599 exp.DataType.Type.FLOAT: "REAL", 600 exp.DataType.Type.NCHAR: "TEXT", 601 exp.DataType.Type.NVARCHAR: "TEXT", 602 exp.DataType.Type.UINT: "UINTEGER", 603 exp.DataType.Type.VARBINARY: "BLOB", 604 exp.DataType.Type.ROWVERSION: "BLOB", 605 exp.DataType.Type.VARCHAR: "TEXT", 606 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 607 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 608 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 609 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 610 } 611 612 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 613 RESERVED_KEYWORDS = { 614 "array", 615 "analyse", 616 "union", 617 "all", 618 "when", 619 "in_p", 620 "default", 621 "create_p", 622 "window", 623 "asymmetric", 624 "to", 625 "else", 626 "localtime", 627 "from", 628 "end_p", 629 "select", 630 "current_date", 631 "foreign", 632 "with", 633 "grant", 634 "session_user", 635 "or", 636 "except", 637 "references", 638 "fetch", 639 "limit", 640 "group_p", 641 "leading", 642 "into", 643 "collate", 644 "offset", 645 "do", 646 "then", 647 "localtimestamp", 648 "check_p", 649 "lateral_p", 650 "current_role", 651 "where", 652 "asc_p", 653 "placing", 654 "desc_p", 655 "user", 656 "unique", 657 "initially", 658 "column", 659 "both", 660 "some", 661 "as", 662 "any", 663 "only", 664 "deferrable", 665 "null_p", 666 "current_time", 667 "true_p", 668 "table", 669 "case", 670 "trailing", 671 "variadic", 672 "for", 673 "on", 674 "distinct", 675 "false_p", 676 "not", 677 "constraint", 678 "current_timestamp", 679 "returning", 680 "primary", 681 "intersect", 682 "having", 683 "analyze", 684 "current_user", 685 "and", 686 "cast", 687 "symmetric", 688 "using", 689 "order", 690 "current_catalog", 691 } 692 693 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 694 695 # DuckDB doesn't generally support CREATE TABLE .. properties 696 # https://duckdb.org/docs/sql/statements/create_table.html 697 PROPERTIES_LOCATION = { 698 prop: exp.Properties.Location.UNSUPPORTED 699 for prop in generator.Generator.PROPERTIES_LOCATION 700 } 701 702 # There are a few exceptions (e.g. temporary tables) which are supported or 703 # can be transpiled to DuckDB, so we explicitly override them accordingly 704 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 705 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 706 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 707 708 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 709 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 710 711 def strtotime_sql(self, expression: exp.StrToTime) -> str: 712 if expression.args.get("safe"): 713 formatted_time = self.format_time(expression) 714 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 715 return str_to_time_sql(self, expression) 716 717 def strtodate_sql(self, expression: exp.StrToDate) -> str: 718 if expression.args.get("safe"): 719 formatted_time = self.format_time(expression) 720 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 721 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 722 723 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 724 arg = expression.this 725 if expression.args.get("safe"): 726 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 727 return self.func("JSON", arg) 728 729 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 730 nano = expression.args.get("nano") 731 if nano is not None: 732 expression.set( 733 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 734 ) 735 736 return rename_func("MAKE_TIME")(self, expression) 737 738 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 739 sec = expression.args["sec"] 740 741 milli = expression.args.get("milli") 742 if milli is not None: 743 sec += milli.pop() / exp.Literal.number(1000.0) 744 745 nano = expression.args.get("nano") 746 if nano is not None: 747 sec += nano.pop() / exp.Literal.number(1000000000.0) 748 749 if milli or nano: 750 expression.set("sec", sec) 751 752 return rename_func("MAKE_TIMESTAMP")(self, expression) 753 754 def tablesample_sql( 755 self, 756 expression: exp.TableSample, 757 tablesample_keyword: t.Optional[str] = None, 758 ) -> str: 759 if not isinstance(expression.parent, exp.Select): 760 # This sample clause only applies to a single source, not the entire resulting relation 761 tablesample_keyword = "TABLESAMPLE" 762 763 if expression.args.get("size"): 764 method = expression.args.get("method") 765 if method and method.name.upper() != "RESERVOIR": 766 self.unsupported( 767 f"Sampling method {method} is not supported with a discrete sample count, " 768 "defaulting to reservoir sampling" 769 ) 770 expression.set("method", exp.var("RESERVOIR")) 771 772 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 773 774 def interval_sql(self, expression: exp.Interval) -> str: 775 multiplier: t.Optional[int] = None 776 unit = expression.text("unit").lower() 777 778 if unit.startswith("week"): 779 multiplier = 7 780 if unit.startswith("quarter"): 781 multiplier = 90 782 783 if multiplier: 784 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 785 786 return super().interval_sql(expression) 787 788 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 789 if isinstance(expression.parent, exp.UserDefinedFunction): 790 return self.sql(expression, "this") 791 return super().columndef_sql(expression, sep) 792 793 def join_sql(self, expression: exp.Join) -> str: 794 if ( 795 expression.side == "LEFT" 796 and not expression.args.get("on") 797 and isinstance(expression.this, exp.Unnest) 798 ): 799 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 800 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 801 return super().join_sql(expression.on(exp.true())) 802 803 return super().join_sql(expression) 804 805 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 806 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 807 if expression.args.get("is_end_exclusive"): 808 return rename_func("RANGE")(self, expression) 809 810 return self.function_fallback_sql(expression) 811 812 def bracket_sql(self, expression: exp.Bracket) -> str: 813 this = expression.this 814 if isinstance(this, exp.Array): 815 this.replace(exp.paren(this)) 816 817 bracket = super().bracket_sql(expression) 818 819 if not expression.args.get("returns_list_for_maps"): 820 if not this.type: 821 from sqlglot.optimizer.annotate_types import annotate_types 822 823 this = annotate_types(this) 824 825 if this.is_type(exp.DataType.Type.MAP): 826 bracket = f"({bracket})[1]" 827 828 return bracket 829 830 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 831 expression_sql = self.sql(expression, "expression") 832 833 func = expression.this 834 if isinstance(func, exp.PERCENTILES): 835 # Make the order key the first arg and slide the fraction to the right 836 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 837 order_col = expression.find(exp.Ordered) 838 if order_col: 839 func.set("expression", func.this) 840 func.set("this", order_col.this) 841 842 this = self.sql(expression, "this").rstrip(")") 843 844 return f"{this}{expression_sql})" 845 846 def length_sql(self, expression: exp.Length) -> str: 847 arg = expression.this 848 849 # Dialects like BQ and Snowflake also accept binary values as args, so 850 # DDB will attempt to infer the type or resort to case/when resolution 851 if not expression.args.get("binary") or arg.is_string: 852 return self.func("LENGTH", arg) 853 854 if not arg.type: 855 from sqlglot.optimizer.annotate_types import annotate_types 856 857 arg = annotate_types(arg) 858 859 if arg.is_type(*exp.DataType.TEXT_TYPES): 860 return self.func("LENGTH", arg) 861 862 # We need these casts to make duckdb's static type checker happy 863 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 864 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 865 866 case = ( 867 exp.case(self.func("TYPEOF", arg)) 868 .when( 869 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 870 ) # anonymous to break length_sql recursion 871 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 872 ) 873 874 return self.sql(case) 875 876 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 877 this = expression.this 878 key = expression.args.get("key") 879 key_sql = key.name if isinstance(key, exp.Expression) else "" 880 value_sql = self.sql(expression, "value") 881 882 kv_sql = f"{key_sql} := {value_sql}" 883 884 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 885 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 886 if isinstance(this, exp.Struct) and not this.expressions: 887 return self.func("STRUCT_PACK", kv_sql) 888 889 return self.func("STRUCT_INSERT", this, kv_sql) 890 891 def unnest_sql(self, expression: exp.Unnest) -> str: 892 explode_array = expression.args.get("explode_array") 893 if explode_array: 894 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 895 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 896 expression.expressions.append( 897 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 898 ) 899 900 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 901 alias = expression.args.get("alias") 902 if alias: 903 expression.set("alias", None) 904 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 905 906 unnest_sql = super().unnest_sql(expression) 907 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 908 return self.sql(select) 909 910 return super().unnest_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator
Specifies the strategy according to which identifiers should be normalized.
262 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 263 if isinstance(path, exp.Literal): 264 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 265 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 266 # This check ensures we'll avoid trying to parse these as JSON paths, which can 267 # either result in a noisy warning or in an invalid representation of the path. 268 path_text = path.name 269 if path_text.startswith("/") or "[#" in path_text: 270 return path 271 272 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
274 class Tokenizer(tokens.Tokenizer): 275 HEREDOC_STRINGS = ["$"] 276 277 HEREDOC_TAG_IS_IDENTIFIER = True 278 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 279 280 KEYWORDS = { 281 **tokens.Tokenizer.KEYWORDS, 282 "//": TokenType.DIV, 283 "ATTACH": TokenType.COMMAND, 284 "BINARY": TokenType.VARBINARY, 285 "BITSTRING": TokenType.BIT, 286 "BPCHAR": TokenType.TEXT, 287 "CHAR": TokenType.TEXT, 288 "CHARACTER VARYING": TokenType.TEXT, 289 "EXCLUDE": TokenType.EXCEPT, 290 "LOGICAL": TokenType.BOOLEAN, 291 "ONLY": TokenType.ONLY, 292 "PIVOT_WIDER": TokenType.PIVOT, 293 "POSITIONAL": TokenType.POSITIONAL, 294 "SIGNED": TokenType.INT, 295 "STRING": TokenType.TEXT, 296 "SUMMARIZE": TokenType.SUMMARIZE, 297 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 298 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 299 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 300 "TIMESTAMP_US": TokenType.TIMESTAMP, 301 "UBIGINT": TokenType.UBIGINT, 302 "UINTEGER": TokenType.UINT, 303 "USMALLINT": TokenType.USMALLINT, 304 "UTINYINT": TokenType.UTINYINT, 305 "VARCHAR": TokenType.TEXT, 306 } 307 KEYWORDS.pop("/*+") 308 309 SINGLE_TOKENS = { 310 **tokens.Tokenizer.SINGLE_TOKENS, 311 "$": TokenType.PARAMETER, 312 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
314 class Parser(parser.Parser): 315 BITWISE = { 316 **parser.Parser.BITWISE, 317 TokenType.TILDA: exp.RegexpLike, 318 } 319 320 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 325 "ARRAY_SORT": exp.SortArray.from_arg_list, 326 "DATEDIFF": _build_date_diff, 327 "DATE_DIFF": _build_date_diff, 328 "DATE_TRUNC": date_trunc_to_time, 329 "DATETRUNC": date_trunc_to_time, 330 "DECODE": lambda args: exp.Decode( 331 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 332 ), 333 "ENCODE": lambda args: exp.Encode( 334 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 335 ), 336 "EPOCH": exp.TimeToUnix.from_arg_list, 337 "EPOCH_MS": lambda args: exp.UnixToTime( 338 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 339 ), 340 "JSON": exp.ParseJSON.from_arg_list, 341 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 342 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 343 "LIST_HAS": exp.ArrayContains.from_arg_list, 344 "LIST_REVERSE_SORT": _build_sort_array_desc, 345 "LIST_SORT": exp.SortArray.from_arg_list, 346 "LIST_VALUE": lambda args: exp.Array(expressions=args), 347 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 348 "MAKE_TIMESTAMP": _build_make_timestamp, 349 "MEDIAN": lambda args: exp.PercentileCont( 350 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 351 ), 352 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 353 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 354 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 355 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 356 ), 357 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 358 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 359 this=seq_get(args, 0), 360 expression=seq_get(args, 1), 361 replacement=seq_get(args, 2), 362 modifiers=seq_get(args, 3), 363 ), 364 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 365 "STRING_SPLIT": exp.Split.from_arg_list, 366 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 367 "STRING_TO_ARRAY": exp.Split.from_arg_list, 368 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 369 "STRUCT_PACK": exp.Struct.from_arg_list, 370 "STR_SPLIT": exp.Split.from_arg_list, 371 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 372 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 373 "UNNEST": exp.Explode.from_arg_list, 374 "XOR": binary_from_function(exp.BitwiseXor), 375 "GENERATE_SERIES": _build_generate_series(), 376 "RANGE": _build_generate_series(end_exclusive=True), 377 } 378 379 FUNCTIONS.pop("DATE_SUB") 380 381 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 382 FUNCTION_PARSERS.pop("DECODE") 383 384 NO_PAREN_FUNCTION_PARSERS = { 385 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 386 "MAP": lambda self: self._parse_map(), 387 } 388 389 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 390 TokenType.SEMI, 391 TokenType.ANTI, 392 } 393 394 PLACEHOLDER_PARSERS = { 395 **parser.Parser.PLACEHOLDER_PARSERS, 396 TokenType.PARAMETER: lambda self: ( 397 self.expression(exp.Placeholder, this=self._prev.text) 398 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 399 else None 400 ), 401 } 402 403 TYPE_CONVERTERS = { 404 # https://duckdb.org/docs/sql/data_types/numeric 405 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 406 # https://duckdb.org/docs/sql/data_types/text 407 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 408 } 409 410 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 411 # https://duckdb.org/docs/sql/samples.html 412 sample = super()._parse_table_sample(as_modifier=as_modifier) 413 if sample and not sample.args.get("method"): 414 if sample.args.get("size"): 415 sample.set("method", exp.var("RESERVOIR")) 416 else: 417 sample.set("method", exp.var("SYSTEM")) 418 419 return sample 420 421 def _parse_bracket( 422 self, this: t.Optional[exp.Expression] = None 423 ) -> t.Optional[exp.Expression]: 424 bracket = super()._parse_bracket(this) 425 if isinstance(bracket, exp.Bracket): 426 bracket.set("returns_list_for_maps", True) 427 428 return bracket 429 430 def _parse_map(self) -> exp.ToMap | exp.Map: 431 if self._match(TokenType.L_BRACE, advance=False): 432 return self.expression(exp.ToMap, this=self._parse_bracket()) 433 434 args = self._parse_wrapped_csv(self._parse_assignment) 435 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 436 437 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 438 return self._parse_field_def() 439 440 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 441 if len(aggregations) == 1: 442 return super()._pivot_column_names(aggregations) 443 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
445 class Generator(generator.Generator): 446 PARAMETER_TOKEN = "$" 447 NAMED_PLACEHOLDER_TOKEN = "$" 448 JOIN_HINTS = False 449 TABLE_HINTS = False 450 QUERY_HINTS = False 451 LIMIT_FETCH = "LIMIT" 452 STRUCT_DELIMITER = ("(", ")") 453 RENAME_TABLE_WITH_DB = False 454 NVL2_SUPPORTED = False 455 SEMI_ANTI_JOIN_WITH_SIDE = False 456 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 457 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 458 LAST_DAY_SUPPORTS_DATE_PART = False 459 JSON_KEY_VALUE_PAIR_SEP = "," 460 IGNORE_NULLS_IN_FUNC = True 461 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 462 SUPPORTS_CREATE_TABLE_LIKE = False 463 MULTI_ARG_DISTINCT = False 464 CAN_IMPLEMENT_ARRAY_ANY = True 465 SUPPORTS_TO_NUMBER = False 466 COPY_HAS_INTO_KEYWORD = False 467 STAR_EXCEPT = "EXCLUDE" 468 PAD_FILL_PATTERN_IS_REQUIRED = True 469 ARRAY_CONCAT_IS_VAR_LEN = False 470 471 TRANSFORMS = { 472 **generator.Generator.TRANSFORMS, 473 exp.ApproxDistinct: approx_count_distinct_sql, 474 exp.Array: inline_array_unless_query, 475 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 476 exp.ArrayFilter: rename_func("LIST_FILTER"), 477 exp.ArraySize: rename_func("ARRAY_LENGTH"), 478 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 479 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 480 exp.ArraySort: _array_sort_sql, 481 exp.ArraySum: rename_func("LIST_SUM"), 482 exp.BitwiseXor: rename_func("XOR"), 483 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 484 exp.CurrentDate: lambda *_: "CURRENT_DATE", 485 exp.CurrentTime: lambda *_: "CURRENT_TIME", 486 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 487 exp.DayOfMonth: rename_func("DAYOFMONTH"), 488 exp.DayOfWeek: rename_func("DAYOFWEEK"), 489 exp.DayOfYear: rename_func("DAYOFYEAR"), 490 exp.DataType: _datatype_sql, 491 exp.Date: _date_sql, 492 exp.DateAdd: _date_delta_sql, 493 exp.DateFromParts: rename_func("MAKE_DATE"), 494 exp.DateSub: _date_delta_sql, 495 exp.DateDiff: _date_diff_sql, 496 exp.DateStrToDate: datestrtodate_sql, 497 exp.Datetime: no_datetime_sql, 498 exp.DatetimeSub: _date_delta_sql, 499 exp.DatetimeAdd: _date_delta_sql, 500 exp.DateToDi: lambda self, 501 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 502 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 503 exp.DiToDate: lambda self, 504 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 505 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 506 exp.GenerateDateArray: _generate_datetime_array_sql, 507 exp.GenerateTimestampArray: _generate_datetime_array_sql, 508 exp.Explode: rename_func("UNNEST"), 509 exp.IntDiv: lambda self, e: self.binary(e, "//"), 510 exp.IsInf: rename_func("ISINF"), 511 exp.IsNan: rename_func("ISNAN"), 512 exp.JSONExtract: _arrow_json_extract_sql, 513 exp.JSONExtractScalar: _arrow_json_extract_sql, 514 exp.JSONFormat: _json_format_sql, 515 exp.LogicalOr: rename_func("BOOL_OR"), 516 exp.LogicalAnd: rename_func("BOOL_AND"), 517 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 518 exp.MonthsBetween: lambda self, e: self.func( 519 "DATEDIFF", 520 "'month'", 521 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 522 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 523 ), 524 exp.PercentileCont: rename_func("QUANTILE_CONT"), 525 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 526 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 527 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 528 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 529 exp.RegexpExtract: regexp_extract_sql, 530 exp.RegexpReplace: lambda self, e: self.func( 531 "REGEXP_REPLACE", 532 e.this, 533 e.expression, 534 e.args.get("replacement"), 535 e.args.get("modifiers"), 536 ), 537 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 538 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 539 exp.Return: lambda self, e: self.sql(e, "this"), 540 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 541 exp.Rand: rename_func("RANDOM"), 542 exp.SafeDivide: no_safe_divide_sql, 543 exp.Split: rename_func("STR_SPLIT"), 544 exp.SortArray: _sort_array_sql, 545 exp.StrPosition: str_position_sql, 546 exp.StrToUnix: lambda self, e: self.func( 547 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 548 ), 549 exp.Struct: _struct_sql, 550 exp.TimeAdd: _date_delta_sql, 551 exp.Time: no_time_sql, 552 exp.TimeDiff: _timediff_sql, 553 exp.Timestamp: no_timestamp_sql, 554 exp.TimestampDiff: lambda self, e: self.func( 555 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 556 ), 557 exp.TimestampTrunc: timestamptrunc_sql(), 558 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 559 exp.TimeStrToTime: timestrtotime_sql, 560 exp.TimeStrToUnix: lambda self, e: self.func( 561 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 562 ), 563 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 564 exp.TimeToUnix: rename_func("EPOCH"), 565 exp.TsOrDiToDi: lambda self, 566 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 567 exp.TsOrDsAdd: _date_delta_sql, 568 exp.TsOrDsDiff: lambda self, e: self.func( 569 "DATE_DIFF", 570 f"'{e.args.get('unit') or 'DAY'}'", 571 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 572 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 573 ), 574 exp.UnixToStr: lambda self, e: self.func( 575 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 576 ), 577 exp.DatetimeTrunc: lambda self, e: self.func( 578 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 579 ), 580 exp.UnixToTime: _unix_to_time_sql, 581 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 582 exp.VariancePop: rename_func("VAR_POP"), 583 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 584 exp.Xor: bool_xor_sql, 585 } 586 587 SUPPORTED_JSON_PATH_PARTS = { 588 exp.JSONPathKey, 589 exp.JSONPathRoot, 590 exp.JSONPathSubscript, 591 exp.JSONPathWildcard, 592 } 593 594 TYPE_MAPPING = { 595 **generator.Generator.TYPE_MAPPING, 596 exp.DataType.Type.BINARY: "BLOB", 597 exp.DataType.Type.BPCHAR: "TEXT", 598 exp.DataType.Type.CHAR: "TEXT", 599 exp.DataType.Type.FLOAT: "REAL", 600 exp.DataType.Type.NCHAR: "TEXT", 601 exp.DataType.Type.NVARCHAR: "TEXT", 602 exp.DataType.Type.UINT: "UINTEGER", 603 exp.DataType.Type.VARBINARY: "BLOB", 604 exp.DataType.Type.ROWVERSION: "BLOB", 605 exp.DataType.Type.VARCHAR: "TEXT", 606 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 607 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 608 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 609 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 610 } 611 612 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 613 RESERVED_KEYWORDS = { 614 "array", 615 "analyse", 616 "union", 617 "all", 618 "when", 619 "in_p", 620 "default", 621 "create_p", 622 "window", 623 "asymmetric", 624 "to", 625 "else", 626 "localtime", 627 "from", 628 "end_p", 629 "select", 630 "current_date", 631 "foreign", 632 "with", 633 "grant", 634 "session_user", 635 "or", 636 "except", 637 "references", 638 "fetch", 639 "limit", 640 "group_p", 641 "leading", 642 "into", 643 "collate", 644 "offset", 645 "do", 646 "then", 647 "localtimestamp", 648 "check_p", 649 "lateral_p", 650 "current_role", 651 "where", 652 "asc_p", 653 "placing", 654 "desc_p", 655 "user", 656 "unique", 657 "initially", 658 "column", 659 "both", 660 "some", 661 "as", 662 "any", 663 "only", 664 "deferrable", 665 "null_p", 666 "current_time", 667 "true_p", 668 "table", 669 "case", 670 "trailing", 671 "variadic", 672 "for", 673 "on", 674 "distinct", 675 "false_p", 676 "not", 677 "constraint", 678 "current_timestamp", 679 "returning", 680 "primary", 681 "intersect", 682 "having", 683 "analyze", 684 "current_user", 685 "and", 686 "cast", 687 "symmetric", 688 "using", 689 "order", 690 "current_catalog", 691 } 692 693 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 694 695 # DuckDB doesn't generally support CREATE TABLE .. properties 696 # https://duckdb.org/docs/sql/statements/create_table.html 697 PROPERTIES_LOCATION = { 698 prop: exp.Properties.Location.UNSUPPORTED 699 for prop in generator.Generator.PROPERTIES_LOCATION 700 } 701 702 # There are a few exceptions (e.g. temporary tables) which are supported or 703 # can be transpiled to DuckDB, so we explicitly override them accordingly 704 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 705 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 706 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 707 708 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 709 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 710 711 def strtotime_sql(self, expression: exp.StrToTime) -> str: 712 if expression.args.get("safe"): 713 formatted_time = self.format_time(expression) 714 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 715 return str_to_time_sql(self, expression) 716 717 def strtodate_sql(self, expression: exp.StrToDate) -> str: 718 if expression.args.get("safe"): 719 formatted_time = self.format_time(expression) 720 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 721 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 722 723 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 724 arg = expression.this 725 if expression.args.get("safe"): 726 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 727 return self.func("JSON", arg) 728 729 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 730 nano = expression.args.get("nano") 731 if nano is not None: 732 expression.set( 733 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 734 ) 735 736 return rename_func("MAKE_TIME")(self, expression) 737 738 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 739 sec = expression.args["sec"] 740 741 milli = expression.args.get("milli") 742 if milli is not None: 743 sec += milli.pop() / exp.Literal.number(1000.0) 744 745 nano = expression.args.get("nano") 746 if nano is not None: 747 sec += nano.pop() / exp.Literal.number(1000000000.0) 748 749 if milli or nano: 750 expression.set("sec", sec) 751 752 return rename_func("MAKE_TIMESTAMP")(self, expression) 753 754 def tablesample_sql( 755 self, 756 expression: exp.TableSample, 757 tablesample_keyword: t.Optional[str] = None, 758 ) -> str: 759 if not isinstance(expression.parent, exp.Select): 760 # This sample clause only applies to a single source, not the entire resulting relation 761 tablesample_keyword = "TABLESAMPLE" 762 763 if expression.args.get("size"): 764 method = expression.args.get("method") 765 if method and method.name.upper() != "RESERVOIR": 766 self.unsupported( 767 f"Sampling method {method} is not supported with a discrete sample count, " 768 "defaulting to reservoir sampling" 769 ) 770 expression.set("method", exp.var("RESERVOIR")) 771 772 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 773 774 def interval_sql(self, expression: exp.Interval) -> str: 775 multiplier: t.Optional[int] = None 776 unit = expression.text("unit").lower() 777 778 if unit.startswith("week"): 779 multiplier = 7 780 if unit.startswith("quarter"): 781 multiplier = 90 782 783 if multiplier: 784 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 785 786 return super().interval_sql(expression) 787 788 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 789 if isinstance(expression.parent, exp.UserDefinedFunction): 790 return self.sql(expression, "this") 791 return super().columndef_sql(expression, sep) 792 793 def join_sql(self, expression: exp.Join) -> str: 794 if ( 795 expression.side == "LEFT" 796 and not expression.args.get("on") 797 and isinstance(expression.this, exp.Unnest) 798 ): 799 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 800 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 801 return super().join_sql(expression.on(exp.true())) 802 803 return super().join_sql(expression) 804 805 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 806 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 807 if expression.args.get("is_end_exclusive"): 808 return rename_func("RANGE")(self, expression) 809 810 return self.function_fallback_sql(expression) 811 812 def bracket_sql(self, expression: exp.Bracket) -> str: 813 this = expression.this 814 if isinstance(this, exp.Array): 815 this.replace(exp.paren(this)) 816 817 bracket = super().bracket_sql(expression) 818 819 if not expression.args.get("returns_list_for_maps"): 820 if not this.type: 821 from sqlglot.optimizer.annotate_types import annotate_types 822 823 this = annotate_types(this) 824 825 if this.is_type(exp.DataType.Type.MAP): 826 bracket = f"({bracket})[1]" 827 828 return bracket 829 830 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 831 expression_sql = self.sql(expression, "expression") 832 833 func = expression.this 834 if isinstance(func, exp.PERCENTILES): 835 # Make the order key the first arg and slide the fraction to the right 836 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 837 order_col = expression.find(exp.Ordered) 838 if order_col: 839 func.set("expression", func.this) 840 func.set("this", order_col.this) 841 842 this = self.sql(expression, "this").rstrip(")") 843 844 return f"{this}{expression_sql})" 845 846 def length_sql(self, expression: exp.Length) -> str: 847 arg = expression.this 848 849 # Dialects like BQ and Snowflake also accept binary values as args, so 850 # DDB will attempt to infer the type or resort to case/when resolution 851 if not expression.args.get("binary") or arg.is_string: 852 return self.func("LENGTH", arg) 853 854 if not arg.type: 855 from sqlglot.optimizer.annotate_types import annotate_types 856 857 arg = annotate_types(arg) 858 859 if arg.is_type(*exp.DataType.TEXT_TYPES): 860 return self.func("LENGTH", arg) 861 862 # We need these casts to make duckdb's static type checker happy 863 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 864 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 865 866 case = ( 867 exp.case(self.func("TYPEOF", arg)) 868 .when( 869 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 870 ) # anonymous to break length_sql recursion 871 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 872 ) 873 874 return self.sql(case) 875 876 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 877 this = expression.this 878 key = expression.args.get("key") 879 key_sql = key.name if isinstance(key, exp.Expression) else "" 880 value_sql = self.sql(expression, "value") 881 882 kv_sql = f"{key_sql} := {value_sql}" 883 884 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 885 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 886 if isinstance(this, exp.Struct) and not this.expressions: 887 return self.func("STRUCT_PACK", kv_sql) 888 889 return self.func("STRUCT_INSERT", this, kv_sql) 890 891 def unnest_sql(self, expression: exp.Unnest) -> str: 892 explode_array = expression.args.get("explode_array") 893 if explode_array: 894 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 895 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 896 expression.expressions.append( 897 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 898 ) 899 900 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 901 alias = expression.args.get("alias") 902 if alias: 903 expression.set("alias", None) 904 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 905 906 unnest_sql = super().unnest_sql(expression) 907 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 908 return self.sql(select) 909 910 return super().unnest_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
717 def strtodate_sql(self, expression: exp.StrToDate) -> str: 718 if expression.args.get("safe"): 719 formatted_time = self.format_time(expression) 720 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 721 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
729 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 730 nano = expression.args.get("nano") 731 if nano is not None: 732 expression.set( 733 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 734 ) 735 736 return rename_func("MAKE_TIME")(self, expression)
738 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 739 sec = expression.args["sec"] 740 741 milli = expression.args.get("milli") 742 if milli is not None: 743 sec += milli.pop() / exp.Literal.number(1000.0) 744 745 nano = expression.args.get("nano") 746 if nano is not None: 747 sec += nano.pop() / exp.Literal.number(1000000000.0) 748 749 if milli or nano: 750 expression.set("sec", sec) 751 752 return rename_func("MAKE_TIMESTAMP")(self, expression)
754 def tablesample_sql( 755 self, 756 expression: exp.TableSample, 757 tablesample_keyword: t.Optional[str] = None, 758 ) -> str: 759 if not isinstance(expression.parent, exp.Select): 760 # This sample clause only applies to a single source, not the entire resulting relation 761 tablesample_keyword = "TABLESAMPLE" 762 763 if expression.args.get("size"): 764 method = expression.args.get("method") 765 if method and method.name.upper() != "RESERVOIR": 766 self.unsupported( 767 f"Sampling method {method} is not supported with a discrete sample count, " 768 "defaulting to reservoir sampling" 769 ) 770 expression.set("method", exp.var("RESERVOIR")) 771 772 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
774 def interval_sql(self, expression: exp.Interval) -> str: 775 multiplier: t.Optional[int] = None 776 unit = expression.text("unit").lower() 777 778 if unit.startswith("week"): 779 multiplier = 7 780 if unit.startswith("quarter"): 781 multiplier = 90 782 783 if multiplier: 784 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 785 786 return super().interval_sql(expression)
793 def join_sql(self, expression: exp.Join) -> str: 794 if ( 795 expression.side == "LEFT" 796 and not expression.args.get("on") 797 and isinstance(expression.this, exp.Unnest) 798 ): 799 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 800 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 801 return super().join_sql(expression.on(exp.true())) 802 803 return super().join_sql(expression)
812 def bracket_sql(self, expression: exp.Bracket) -> str: 813 this = expression.this 814 if isinstance(this, exp.Array): 815 this.replace(exp.paren(this)) 816 817 bracket = super().bracket_sql(expression) 818 819 if not expression.args.get("returns_list_for_maps"): 820 if not this.type: 821 from sqlglot.optimizer.annotate_types import annotate_types 822 823 this = annotate_types(this) 824 825 if this.is_type(exp.DataType.Type.MAP): 826 bracket = f"({bracket})[1]" 827 828 return bracket
830 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 831 expression_sql = self.sql(expression, "expression") 832 833 func = expression.this 834 if isinstance(func, exp.PERCENTILES): 835 # Make the order key the first arg and slide the fraction to the right 836 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 837 order_col = expression.find(exp.Ordered) 838 if order_col: 839 func.set("expression", func.this) 840 func.set("this", order_col.this) 841 842 this = self.sql(expression, "this").rstrip(")") 843 844 return f"{this}{expression_sql})"
846 def length_sql(self, expression: exp.Length) -> str: 847 arg = expression.this 848 849 # Dialects like BQ and Snowflake also accept binary values as args, so 850 # DDB will attempt to infer the type or resort to case/when resolution 851 if not expression.args.get("binary") or arg.is_string: 852 return self.func("LENGTH", arg) 853 854 if not arg.type: 855 from sqlglot.optimizer.annotate_types import annotate_types 856 857 arg = annotate_types(arg) 858 859 if arg.is_type(*exp.DataType.TEXT_TYPES): 860 return self.func("LENGTH", arg) 861 862 # We need these casts to make duckdb's static type checker happy 863 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 864 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 865 866 case = ( 867 exp.case(self.func("TYPEOF", arg)) 868 .when( 869 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 870 ) # anonymous to break length_sql recursion 871 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 872 ) 873 874 return self.sql(case)
876 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 877 this = expression.this 878 key = expression.args.get("key") 879 key_sql = key.name if isinstance(key, exp.Expression) else "" 880 value_sql = self.sql(expression, "value") 881 882 kv_sql = f"{key_sql} := {value_sql}" 883 884 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 885 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 886 if isinstance(this, exp.Struct) and not this.expressions: 887 return self.func("STRUCT_PACK", kv_sql) 888 889 return self.func("STRUCT_INSERT", this, kv_sql)
891 def unnest_sql(self, expression: exp.Unnest) -> str: 892 explode_array = expression.args.get("explode_array") 893 if explode_array: 894 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 895 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 896 expression.expressions.append( 897 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 898 ) 899 900 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 901 alias = expression.args.get("alias") 902 if alias: 903 expression.set("alias", None) 904 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 905 906 unnest_sql = super().unnest_sql(expression) 907 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 908 return self.sql(select) 909 910 return super().unnest_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql