sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4import datetime 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 NormalizationStrategy, 10 arg_max_or_min_no_count, 11 build_date_delta, 12 build_formatted_time, 13 inline_array_sql, 14 json_extract_segments, 15 json_path_key_only_name, 16 no_pivot_sql, 17 build_json_extract_path, 18 rename_func, 19 sha256_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28 29DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 30 31 32def _build_date_format(args: t.List) -> exp.TimeToStr: 33 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 34 35 timezone = seq_get(args, 2) 36 if timezone: 37 expr.set("zone", timezone) 38 39 return expr 40 41 42def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 43 scale = expression.args.get("scale") 44 timestamp = expression.this 45 46 if scale in (None, exp.UnixToTime.SECONDS): 47 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MILLIS: 49 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MICROS: 51 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.NANOS: 53 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 55 return self.func( 56 "fromUnixTimestamp", 57 exp.cast( 58 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 59 ), 60 ) 61 62 63def _lower_func(sql: str) -> str: 64 index = sql.index("(") 65 return sql[:index].lower() + sql[index:] 66 67 68def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 69 quantile = expression.args["quantile"] 70 args = f"({self.sql(expression, 'this')})" 71 72 if isinstance(quantile, exp.Array): 73 func = self.func("quantiles", *quantile) 74 else: 75 func = self.func("quantile", quantile) 76 77 return func + args 78 79 80def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 81 if len(args) == 1: 82 return exp.CountIf(this=seq_get(args, 0)) 83 84 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 85 86 87def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 88 if len(args) == 3: 89 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 90 91 strtodate = exp.StrToDate.from_arg_list(args) 92 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 93 94 95def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 96 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 97 if not expression.unit: 98 return rename_func(name)(self, expression) 99 100 return self.func( 101 name, 102 unit_to_var(expression), 103 expression.expression, 104 expression.this, 105 ) 106 107 return _delta_sql 108 109 110def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 111 tz = expression.args.get("zone") 112 datatype = exp.DataType.build(exp.DataType.Type.TIMESTAMP) 113 ts = expression.this 114 if tz: 115 # build a datatype that encodes the timezone as a type parameter, eg DateTime('America/Los_Angeles') 116 datatype = exp.DataType.build( 117 exp.DataType.Type.TIMESTAMPTZ, # Type.TIMESTAMPTZ maps to DateTime 118 expressions=[exp.DataTypeParam(this=tz)], 119 ) 120 121 if isinstance(ts, exp.Literal): 122 # strip the timezone out of the literal, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 123 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if it's part of the timestamp string 124 ts_without_tz = ( 125 datetime.datetime.fromisoformat(ts.name).replace(tzinfo=None).isoformat(sep=" ") 126 ) 127 ts = exp.Literal.string(ts_without_tz) 128 129 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 130 131 132class ClickHouse(Dialect): 133 NORMALIZE_FUNCTIONS: bool | str = False 134 NULL_ORDERING = "nulls_are_last" 135 SUPPORTS_USER_DEFINED_TYPES = False 136 SAFE_DIVISION = True 137 LOG_BASE_FIRST: t.Optional[bool] = None 138 FORCE_EARLY_ALIAS_REF_EXPANSION = True 139 140 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 141 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 142 143 UNESCAPED_SEQUENCES = { 144 "\\0": "\0", 145 } 146 147 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 148 149 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 150 exp.Except: False, 151 exp.Intersect: False, 152 exp.Union: None, 153 } 154 155 class Tokenizer(tokens.Tokenizer): 156 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 157 IDENTIFIERS = ['"', "`"] 158 STRING_ESCAPES = ["'", "\\"] 159 BIT_STRINGS = [("0b", "")] 160 HEX_STRINGS = [("0x", ""), ("0X", "")] 161 HEREDOC_STRINGS = ["$"] 162 163 KEYWORDS = { 164 **tokens.Tokenizer.KEYWORDS, 165 "ATTACH": TokenType.COMMAND, 166 "DATE32": TokenType.DATE32, 167 "DATETIME64": TokenType.DATETIME64, 168 "DICTIONARY": TokenType.DICTIONARY, 169 "ENUM8": TokenType.ENUM8, 170 "ENUM16": TokenType.ENUM16, 171 "FINAL": TokenType.FINAL, 172 "FIXEDSTRING": TokenType.FIXEDSTRING, 173 "FLOAT32": TokenType.FLOAT, 174 "FLOAT64": TokenType.DOUBLE, 175 "GLOBAL": TokenType.GLOBAL, 176 "INT256": TokenType.INT256, 177 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 178 "MAP": TokenType.MAP, 179 "NESTED": TokenType.NESTED, 180 "SAMPLE": TokenType.TABLE_SAMPLE, 181 "TUPLE": TokenType.STRUCT, 182 "UINT128": TokenType.UINT128, 183 "UINT16": TokenType.USMALLINT, 184 "UINT256": TokenType.UINT256, 185 "UINT32": TokenType.UINT, 186 "UINT64": TokenType.UBIGINT, 187 "UINT8": TokenType.UTINYINT, 188 "IPV4": TokenType.IPV4, 189 "IPV6": TokenType.IPV6, 190 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 191 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 192 "SYSTEM": TokenType.COMMAND, 193 "PREWHERE": TokenType.PREWHERE, 194 } 195 KEYWORDS.pop("/*+") 196 197 SINGLE_TOKENS = { 198 **tokens.Tokenizer.SINGLE_TOKENS, 199 "$": TokenType.HEREDOC_STRING, 200 } 201 202 class Parser(parser.Parser): 203 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 204 # * select x from t1 union all select x from t2 limit 1; 205 # * select x from t1 union all (select x from t2 limit 1); 206 MODIFIERS_ATTACHED_TO_SET_OP = False 207 INTERVAL_SPANS = False 208 209 FUNCTIONS = { 210 **parser.Parser.FUNCTIONS, 211 "ANY": exp.AnyValue.from_arg_list, 212 "ARRAYSUM": exp.ArraySum.from_arg_list, 213 "COUNTIF": _build_count_if, 214 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 215 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 217 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATE_FORMAT": _build_date_format, 219 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 220 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 221 "FORMATDATETIME": _build_date_format, 222 "JSONEXTRACTSTRING": build_json_extract_path( 223 exp.JSONExtractScalar, zero_based_indexing=False 224 ), 225 "MAP": parser.build_var_map, 226 "MATCH": exp.RegexpLike.from_arg_list, 227 "RANDCANONICAL": exp.Rand.from_arg_list, 228 "STR_TO_DATE": _build_str_to_date, 229 "TUPLE": exp.Struct.from_arg_list, 230 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 231 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 233 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "UNIQ": exp.ApproxDistinct.from_arg_list, 235 "XOR": lambda args: exp.Xor(expressions=args), 236 "MD5": exp.MD5Digest.from_arg_list, 237 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 238 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 239 } 240 241 AGG_FUNCTIONS = { 242 "count", 243 "min", 244 "max", 245 "sum", 246 "avg", 247 "any", 248 "stddevPop", 249 "stddevSamp", 250 "varPop", 251 "varSamp", 252 "corr", 253 "covarPop", 254 "covarSamp", 255 "entropy", 256 "exponentialMovingAverage", 257 "intervalLengthSum", 258 "kolmogorovSmirnovTest", 259 "mannWhitneyUTest", 260 "median", 261 "rankCorr", 262 "sumKahan", 263 "studentTTest", 264 "welchTTest", 265 "anyHeavy", 266 "anyLast", 267 "boundingRatio", 268 "first_value", 269 "last_value", 270 "argMin", 271 "argMax", 272 "avgWeighted", 273 "topK", 274 "topKWeighted", 275 "deltaSum", 276 "deltaSumTimestamp", 277 "groupArray", 278 "groupArrayLast", 279 "groupUniqArray", 280 "groupArrayInsertAt", 281 "groupArrayMovingAvg", 282 "groupArrayMovingSum", 283 "groupArraySample", 284 "groupBitAnd", 285 "groupBitOr", 286 "groupBitXor", 287 "groupBitmap", 288 "groupBitmapAnd", 289 "groupBitmapOr", 290 "groupBitmapXor", 291 "sumWithOverflow", 292 "sumMap", 293 "minMap", 294 "maxMap", 295 "skewSamp", 296 "skewPop", 297 "kurtSamp", 298 "kurtPop", 299 "uniq", 300 "uniqExact", 301 "uniqCombined", 302 "uniqCombined64", 303 "uniqHLL12", 304 "uniqTheta", 305 "quantile", 306 "quantiles", 307 "quantileExact", 308 "quantilesExact", 309 "quantileExactLow", 310 "quantilesExactLow", 311 "quantileExactHigh", 312 "quantilesExactHigh", 313 "quantileExactWeighted", 314 "quantilesExactWeighted", 315 "quantileTiming", 316 "quantilesTiming", 317 "quantileTimingWeighted", 318 "quantilesTimingWeighted", 319 "quantileDeterministic", 320 "quantilesDeterministic", 321 "quantileTDigest", 322 "quantilesTDigest", 323 "quantileTDigestWeighted", 324 "quantilesTDigestWeighted", 325 "quantileBFloat16", 326 "quantilesBFloat16", 327 "quantileBFloat16Weighted", 328 "quantilesBFloat16Weighted", 329 "simpleLinearRegression", 330 "stochasticLinearRegression", 331 "stochasticLogisticRegression", 332 "categoricalInformationValue", 333 "contingency", 334 "cramersV", 335 "cramersVBiasCorrected", 336 "theilsU", 337 "maxIntersections", 338 "maxIntersectionsPosition", 339 "meanZTest", 340 "quantileInterpolatedWeighted", 341 "quantilesInterpolatedWeighted", 342 "quantileGK", 343 "quantilesGK", 344 "sparkBar", 345 "sumCount", 346 "largestTriangleThreeBuckets", 347 "histogram", 348 "sequenceMatch", 349 "sequenceCount", 350 "windowFunnel", 351 "retention", 352 "uniqUpTo", 353 "sequenceNextNode", 354 "exponentialTimeDecayedAvg", 355 } 356 357 AGG_FUNCTIONS_SUFFIXES = [ 358 "If", 359 "Array", 360 "ArrayIf", 361 "Map", 362 "SimpleState", 363 "State", 364 "Merge", 365 "MergeState", 366 "ForEach", 367 "Distinct", 368 "OrDefault", 369 "OrNull", 370 "Resample", 371 "ArgMin", 372 "ArgMax", 373 ] 374 375 FUNC_TOKENS = { 376 *parser.Parser.FUNC_TOKENS, 377 TokenType.SET, 378 } 379 380 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 381 382 ID_VAR_TOKENS = { 383 *parser.Parser.ID_VAR_TOKENS, 384 TokenType.LIKE, 385 } 386 387 AGG_FUNC_MAPPING = ( 388 lambda functions, suffixes: { 389 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 390 } 391 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 392 393 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 394 395 FUNCTION_PARSERS = { 396 **parser.Parser.FUNCTION_PARSERS, 397 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 398 "QUANTILE": lambda self: self._parse_quantile(), 399 } 400 401 FUNCTION_PARSERS.pop("MATCH") 402 403 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 404 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 405 406 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 407 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 408 409 RANGE_PARSERS = { 410 **parser.Parser.RANGE_PARSERS, 411 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 412 and self._parse_in(this, is_global=True), 413 } 414 415 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 416 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 417 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 418 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 419 420 JOIN_KINDS = { 421 *parser.Parser.JOIN_KINDS, 422 TokenType.ANY, 423 TokenType.ASOF, 424 TokenType.ARRAY, 425 } 426 427 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 428 TokenType.ANY, 429 TokenType.ARRAY, 430 TokenType.FINAL, 431 TokenType.FORMAT, 432 TokenType.SETTINGS, 433 } 434 435 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 436 TokenType.FORMAT, 437 } 438 439 LOG_DEFAULTS_TO_LN = True 440 441 QUERY_MODIFIER_PARSERS = { 442 **parser.Parser.QUERY_MODIFIER_PARSERS, 443 TokenType.SETTINGS: lambda self: ( 444 "settings", 445 self._advance() or self._parse_csv(self._parse_assignment), 446 ), 447 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 448 } 449 450 CONSTRAINT_PARSERS = { 451 **parser.Parser.CONSTRAINT_PARSERS, 452 "INDEX": lambda self: self._parse_index_constraint(), 453 "CODEC": lambda self: self._parse_compress(), 454 } 455 456 ALTER_PARSERS = { 457 **parser.Parser.ALTER_PARSERS, 458 "REPLACE": lambda self: self._parse_alter_table_replace(), 459 } 460 461 SCHEMA_UNNAMED_CONSTRAINTS = { 462 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 463 "INDEX", 464 } 465 466 PLACEHOLDER_PARSERS = { 467 **parser.Parser.PLACEHOLDER_PARSERS, 468 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 469 } 470 471 def _parse_types( 472 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 473 ) -> t.Optional[exp.Expression]: 474 dtype = super()._parse_types( 475 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 476 ) 477 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 478 # Mark every type as non-nullable which is ClickHouse's default, unless it's 479 # already marked as nullable. This marker helps us transpile types from other 480 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 481 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 482 # fail in ClickHouse without the `Nullable` type constructor. 483 dtype.set("nullable", False) 484 485 return dtype 486 487 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 488 index = self._index 489 this = self._parse_bitwise() 490 if self._match(TokenType.FROM): 491 self._retreat(index) 492 return super()._parse_extract() 493 494 # We return Anonymous here because extract and regexpExtract have different semantics, 495 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 496 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 497 # 498 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 499 self._match(TokenType.COMMA) 500 return self.expression( 501 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 502 ) 503 504 def _parse_assignment(self) -> t.Optional[exp.Expression]: 505 this = super()._parse_assignment() 506 507 if self._match(TokenType.PLACEHOLDER): 508 return self.expression( 509 exp.If, 510 this=this, 511 true=self._parse_assignment(), 512 false=self._match(TokenType.COLON) and self._parse_assignment(), 513 ) 514 515 return this 516 517 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 518 """ 519 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 520 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 521 """ 522 this = self._parse_id_var() 523 self._match(TokenType.COLON) 524 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 525 self._match_text_seq("IDENTIFIER") and "Identifier" 526 ) 527 528 if not kind: 529 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 530 elif not self._match(TokenType.R_BRACE): 531 self.raise_error("Expecting }") 532 533 return self.expression(exp.Placeholder, this=this, kind=kind) 534 535 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 536 this = super()._parse_in(this) 537 this.set("is_global", is_global) 538 return this 539 540 def _parse_table( 541 self, 542 schema: bool = False, 543 joins: bool = False, 544 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 545 parse_bracket: bool = False, 546 is_db_reference: bool = False, 547 parse_partition: bool = False, 548 ) -> t.Optional[exp.Expression]: 549 this = super()._parse_table( 550 schema=schema, 551 joins=joins, 552 alias_tokens=alias_tokens, 553 parse_bracket=parse_bracket, 554 is_db_reference=is_db_reference, 555 ) 556 557 if self._match(TokenType.FINAL): 558 this = self.expression(exp.Final, this=this) 559 560 return this 561 562 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 563 return super()._parse_position(haystack_first=True) 564 565 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 566 def _parse_cte(self) -> exp.CTE: 567 # WITH <identifier> AS <subquery expression> 568 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 569 570 if not cte: 571 # WITH <expression> AS <identifier> 572 cte = self.expression( 573 exp.CTE, 574 this=self._parse_assignment(), 575 alias=self._parse_table_alias(), 576 scalar=True, 577 ) 578 579 return cte 580 581 def _parse_join_parts( 582 self, 583 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 584 is_global = self._match(TokenType.GLOBAL) and self._prev 585 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 586 587 if kind_pre: 588 kind = self._match_set(self.JOIN_KINDS) and self._prev 589 side = self._match_set(self.JOIN_SIDES) and self._prev 590 return is_global, side, kind 591 592 return ( 593 is_global, 594 self._match_set(self.JOIN_SIDES) and self._prev, 595 self._match_set(self.JOIN_KINDS) and self._prev, 596 ) 597 598 def _parse_join( 599 self, skip_join_token: bool = False, parse_bracket: bool = False 600 ) -> t.Optional[exp.Join]: 601 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 602 if join: 603 join.set("global", join.args.pop("method", None)) 604 605 return join 606 607 def _parse_function( 608 self, 609 functions: t.Optional[t.Dict[str, t.Callable]] = None, 610 anonymous: bool = False, 611 optional_parens: bool = True, 612 any_token: bool = False, 613 ) -> t.Optional[exp.Expression]: 614 expr = super()._parse_function( 615 functions=functions, 616 anonymous=anonymous, 617 optional_parens=optional_parens, 618 any_token=any_token, 619 ) 620 621 func = expr.this if isinstance(expr, exp.Window) else expr 622 623 # Aggregate functions can be split in 2 parts: <func_name><suffix> 624 parts = ( 625 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 626 ) 627 628 if parts: 629 params = self._parse_func_params(func) 630 631 kwargs = { 632 "this": func.this, 633 "expressions": func.expressions, 634 } 635 if parts[1]: 636 kwargs["parts"] = parts 637 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 638 else: 639 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 640 641 kwargs["exp_class"] = exp_class 642 if params: 643 kwargs["params"] = params 644 645 func = self.expression(**kwargs) 646 647 if isinstance(expr, exp.Window): 648 # The window's func was parsed as Anonymous in base parser, fix its 649 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 650 expr.set("this", func) 651 elif params: 652 # Params have blocked super()._parse_function() from parsing the following window 653 # (if that exists) as they're standing between the function call and the window spec 654 expr = self._parse_window(func) 655 else: 656 expr = func 657 658 return expr 659 660 def _parse_func_params( 661 self, this: t.Optional[exp.Func] = None 662 ) -> t.Optional[t.List[exp.Expression]]: 663 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 664 return self._parse_csv(self._parse_lambda) 665 666 if self._match(TokenType.L_PAREN): 667 params = self._parse_csv(self._parse_lambda) 668 self._match_r_paren(this) 669 return params 670 671 return None 672 673 def _parse_quantile(self) -> exp.Quantile: 674 this = self._parse_lambda() 675 params = self._parse_func_params() 676 if params: 677 return self.expression(exp.Quantile, this=params[0], quantile=this) 678 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 679 680 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 681 return super()._parse_wrapped_id_vars(optional=True) 682 683 def _parse_primary_key( 684 self, wrapped_optional: bool = False, in_props: bool = False 685 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 686 return super()._parse_primary_key( 687 wrapped_optional=wrapped_optional or in_props, in_props=in_props 688 ) 689 690 def _parse_on_property(self) -> t.Optional[exp.Expression]: 691 index = self._index 692 if self._match_text_seq("CLUSTER"): 693 this = self._parse_id_var() 694 if this: 695 return self.expression(exp.OnCluster, this=this) 696 else: 697 self._retreat(index) 698 return None 699 700 def _parse_index_constraint( 701 self, kind: t.Optional[str] = None 702 ) -> exp.IndexColumnConstraint: 703 # INDEX name1 expr TYPE type1(args) GRANULARITY value 704 this = self._parse_id_var() 705 expression = self._parse_assignment() 706 707 index_type = self._match_text_seq("TYPE") and ( 708 self._parse_function() or self._parse_var() 709 ) 710 711 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 712 713 return self.expression( 714 exp.IndexColumnConstraint, 715 this=this, 716 expression=expression, 717 index_type=index_type, 718 granularity=granularity, 719 ) 720 721 def _parse_partition(self) -> t.Optional[exp.Partition]: 722 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 723 if not self._match(TokenType.PARTITION): 724 return None 725 726 if self._match_text_seq("ID"): 727 # Corresponds to the PARTITION ID <string_value> syntax 728 expressions: t.List[exp.Expression] = [ 729 self.expression(exp.PartitionId, this=self._parse_string()) 730 ] 731 else: 732 expressions = self._parse_expressions() 733 734 return self.expression(exp.Partition, expressions=expressions) 735 736 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 737 partition = self._parse_partition() 738 739 if not partition or not self._match(TokenType.FROM): 740 return None 741 742 return self.expression( 743 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 744 ) 745 746 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 747 if not self._match_text_seq("PROJECTION"): 748 return None 749 750 return self.expression( 751 exp.ProjectionDef, 752 this=self._parse_id_var(), 753 expression=self._parse_wrapped(self._parse_statement), 754 ) 755 756 def _parse_constraint(self) -> t.Optional[exp.Expression]: 757 return super()._parse_constraint() or self._parse_projection_def() 758 759 def _parse_alias( 760 self, this: t.Optional[exp.Expression], explicit: bool = False 761 ) -> t.Optional[exp.Expression]: 762 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 763 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 764 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 765 return this 766 767 return super()._parse_alias(this=this, explicit=explicit) 768 769 def _parse_expression(self) -> t.Optional[exp.Expression]: 770 this = super()._parse_expression() 771 772 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 773 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 774 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 775 self._match(TokenType.R_PAREN) 776 777 return this 778 779 class Generator(generator.Generator): 780 QUERY_HINTS = False 781 STRUCT_DELIMITER = ("(", ")") 782 NVL2_SUPPORTED = False 783 TABLESAMPLE_REQUIRES_PARENS = False 784 TABLESAMPLE_SIZE_IS_ROWS = False 785 TABLESAMPLE_KEYWORDS = "SAMPLE" 786 LAST_DAY_SUPPORTS_DATE_PART = False 787 CAN_IMPLEMENT_ARRAY_ANY = True 788 SUPPORTS_TO_NUMBER = False 789 JOIN_HINTS = False 790 TABLE_HINTS = False 791 GROUPINGS_SEP = "" 792 SET_OP_MODIFIERS = False 793 SUPPORTS_TABLE_ALIAS_COLUMNS = False 794 VALUES_AS_TABLE = False 795 796 STRING_TYPE_MAPPING = { 797 exp.DataType.Type.CHAR: "String", 798 exp.DataType.Type.LONGBLOB: "String", 799 exp.DataType.Type.LONGTEXT: "String", 800 exp.DataType.Type.MEDIUMBLOB: "String", 801 exp.DataType.Type.MEDIUMTEXT: "String", 802 exp.DataType.Type.TINYBLOB: "String", 803 exp.DataType.Type.TINYTEXT: "String", 804 exp.DataType.Type.TEXT: "String", 805 exp.DataType.Type.VARBINARY: "String", 806 exp.DataType.Type.VARCHAR: "String", 807 } 808 809 SUPPORTED_JSON_PATH_PARTS = { 810 exp.JSONPathKey, 811 exp.JSONPathRoot, 812 exp.JSONPathSubscript, 813 } 814 815 TYPE_MAPPING = { 816 **generator.Generator.TYPE_MAPPING, 817 **STRING_TYPE_MAPPING, 818 exp.DataType.Type.ARRAY: "Array", 819 exp.DataType.Type.BIGINT: "Int64", 820 exp.DataType.Type.DATE32: "Date32", 821 exp.DataType.Type.DATETIME: "DateTime", 822 exp.DataType.Type.DATETIME64: "DateTime64", 823 exp.DataType.Type.TIMESTAMP: "DateTime", 824 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 825 exp.DataType.Type.DOUBLE: "Float64", 826 exp.DataType.Type.ENUM: "Enum", 827 exp.DataType.Type.ENUM8: "Enum8", 828 exp.DataType.Type.ENUM16: "Enum16", 829 exp.DataType.Type.FIXEDSTRING: "FixedString", 830 exp.DataType.Type.FLOAT: "Float32", 831 exp.DataType.Type.INT: "Int32", 832 exp.DataType.Type.MEDIUMINT: "Int32", 833 exp.DataType.Type.INT128: "Int128", 834 exp.DataType.Type.INT256: "Int256", 835 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 836 exp.DataType.Type.MAP: "Map", 837 exp.DataType.Type.NESTED: "Nested", 838 exp.DataType.Type.SMALLINT: "Int16", 839 exp.DataType.Type.STRUCT: "Tuple", 840 exp.DataType.Type.TINYINT: "Int8", 841 exp.DataType.Type.UBIGINT: "UInt64", 842 exp.DataType.Type.UINT: "UInt32", 843 exp.DataType.Type.UINT128: "UInt128", 844 exp.DataType.Type.UINT256: "UInt256", 845 exp.DataType.Type.USMALLINT: "UInt16", 846 exp.DataType.Type.UTINYINT: "UInt8", 847 exp.DataType.Type.IPV4: "IPv4", 848 exp.DataType.Type.IPV6: "IPv6", 849 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 850 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 851 } 852 853 TRANSFORMS = { 854 **generator.Generator.TRANSFORMS, 855 exp.AnyValue: rename_func("any"), 856 exp.ApproxDistinct: rename_func("uniq"), 857 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 858 exp.ArraySize: rename_func("LENGTH"), 859 exp.ArraySum: rename_func("arraySum"), 860 exp.ArgMax: arg_max_or_min_no_count("argMax"), 861 exp.ArgMin: arg_max_or_min_no_count("argMin"), 862 exp.Array: inline_array_sql, 863 exp.CastToStrType: rename_func("CAST"), 864 exp.CountIf: rename_func("countIf"), 865 exp.CompressColumnConstraint: lambda self, 866 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 867 exp.ComputedColumnConstraint: lambda self, 868 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 869 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 870 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 871 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 872 exp.DateStrToDate: rename_func("toDate"), 873 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 874 exp.Explode: rename_func("arrayJoin"), 875 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 876 exp.IsNan: rename_func("isNaN"), 877 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 878 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 879 exp.JSONPathKey: json_path_key_only_name, 880 exp.JSONPathRoot: lambda *_: "", 881 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 882 exp.Nullif: rename_func("nullIf"), 883 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 884 exp.Pivot: no_pivot_sql, 885 exp.Quantile: _quantile_sql, 886 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 887 exp.Rand: rename_func("randCanonical"), 888 exp.StartsWith: rename_func("startsWith"), 889 exp.StrPosition: lambda self, e: self.func( 890 "position", e.this, e.args.get("substr"), e.args.get("position") 891 ), 892 exp.TimeToStr: lambda self, e: self.func( 893 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 894 ), 895 exp.TimeStrToTime: _timestrtotime_sql, 896 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 897 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 898 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 899 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 900 exp.MD5Digest: rename_func("MD5"), 901 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 902 exp.SHA: rename_func("SHA1"), 903 exp.SHA2: sha256_sql, 904 exp.UnixToTime: _unix_to_time_sql, 905 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 906 exp.Trim: trim_sql, 907 exp.Variance: rename_func("varSamp"), 908 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 909 exp.Stddev: rename_func("stddevSamp"), 910 exp.Chr: rename_func("CHAR"), 911 exp.Lag: lambda self, e: self.func( 912 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 913 ), 914 exp.Lead: lambda self, e: self.func( 915 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 916 ), 917 } 918 919 PROPERTIES_LOCATION = { 920 **generator.Generator.PROPERTIES_LOCATION, 921 exp.OnCluster: exp.Properties.Location.POST_NAME, 922 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 923 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 924 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 925 } 926 927 # There's no list in docs, but it can be found in Clickhouse code 928 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 929 ON_CLUSTER_TARGETS = { 930 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 931 "DATABASE", 932 "TABLE", 933 "VIEW", 934 "DICTIONARY", 935 "INDEX", 936 "FUNCTION", 937 "NAMED COLLECTION", 938 } 939 940 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 941 NON_NULLABLE_TYPES = { 942 exp.DataType.Type.ARRAY, 943 exp.DataType.Type.MAP, 944 exp.DataType.Type.STRUCT, 945 } 946 947 def strtodate_sql(self, expression: exp.StrToDate) -> str: 948 strtodate_sql = self.function_fallback_sql(expression) 949 950 if not isinstance(expression.parent, exp.Cast): 951 # StrToDate returns DATEs in other dialects (eg. postgres), so 952 # this branch aims to improve the transpilation to clickhouse 953 return f"CAST({strtodate_sql} AS DATE)" 954 955 return strtodate_sql 956 957 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 958 this = expression.this 959 960 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 961 return self.sql(this) 962 963 return super().cast_sql(expression, safe_prefix=safe_prefix) 964 965 def trycast_sql(self, expression: exp.TryCast) -> str: 966 dtype = expression.to 967 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 968 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 969 dtype.set("nullable", True) 970 971 return super().cast_sql(expression) 972 973 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 974 this = self.json_path_part(expression.this) 975 return str(int(this) + 1) if is_int(this) else this 976 977 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 978 return f"AS {self.sql(expression, 'this')}" 979 980 def _any_to_has( 981 self, 982 expression: exp.EQ | exp.NEQ, 983 default: t.Callable[[t.Any], str], 984 prefix: str = "", 985 ) -> str: 986 if isinstance(expression.left, exp.Any): 987 arr = expression.left 988 this = expression.right 989 elif isinstance(expression.right, exp.Any): 990 arr = expression.right 991 this = expression.left 992 else: 993 return default(expression) 994 995 return prefix + self.func("has", arr.this.unnest(), this) 996 997 def eq_sql(self, expression: exp.EQ) -> str: 998 return self._any_to_has(expression, super().eq_sql) 999 1000 def neq_sql(self, expression: exp.NEQ) -> str: 1001 return self._any_to_has(expression, super().neq_sql, "NOT ") 1002 1003 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1004 # Manually add a flag to make the search case-insensitive 1005 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1006 return self.func("match", expression.this, regex) 1007 1008 def datatype_sql(self, expression: exp.DataType) -> str: 1009 # String is the standard ClickHouse type, every other variant is just an alias. 1010 # Additionally, any supplied length parameter will be ignored. 1011 # 1012 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1013 if expression.this in self.STRING_TYPE_MAPPING: 1014 dtype = "String" 1015 else: 1016 dtype = super().datatype_sql(expression) 1017 1018 # This section changes the type to `Nullable(...)` if the following conditions hold: 1019 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1020 # and change their semantics 1021 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1022 # constraint: "Type of Map key must be a type, that can be represented by integer or 1023 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1024 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1025 parent = expression.parent 1026 nullable = expression.args.get("nullable") 1027 if nullable is True or ( 1028 nullable is None 1029 and not ( 1030 isinstance(parent, exp.DataType) 1031 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1032 and expression.index in (None, 0) 1033 ) 1034 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1035 ): 1036 dtype = f"Nullable({dtype})" 1037 1038 return dtype 1039 1040 def cte_sql(self, expression: exp.CTE) -> str: 1041 if expression.args.get("scalar"): 1042 this = self.sql(expression, "this") 1043 alias = self.sql(expression, "alias") 1044 return f"{this} AS {alias}" 1045 1046 return super().cte_sql(expression) 1047 1048 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1049 return super().after_limit_modifiers(expression) + [ 1050 ( 1051 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1052 if expression.args.get("settings") 1053 else "" 1054 ), 1055 ( 1056 self.seg("FORMAT ") + self.sql(expression, "format") 1057 if expression.args.get("format") 1058 else "" 1059 ), 1060 ] 1061 1062 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1063 params = self.expressions(expression, key="params", flat=True) 1064 return self.func(expression.name, *expression.expressions) + f"({params})" 1065 1066 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1067 return self.func(expression.name, *expression.expressions) 1068 1069 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1070 return self.anonymousaggfunc_sql(expression) 1071 1072 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1073 return self.parameterizedagg_sql(expression) 1074 1075 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1076 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1077 1078 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1079 return f"ON CLUSTER {self.sql(expression, 'this')}" 1080 1081 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1082 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1083 exp.Properties.Location.POST_NAME 1084 ): 1085 this_name = self.sql( 1086 expression.this if isinstance(expression.this, exp.Schema) else expression, 1087 "this", 1088 ) 1089 this_properties = " ".join( 1090 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1091 ) 1092 this_schema = self.schema_columns_sql(expression.this) 1093 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1094 1095 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1096 1097 return super().createable_sql(expression, locations) 1098 1099 def create_sql(self, expression: exp.Create) -> str: 1100 # The comment property comes last in CTAS statements, i.e. after the query 1101 query = expression.expression 1102 if isinstance(query, exp.Query): 1103 comment_prop = expression.find(exp.SchemaCommentProperty) 1104 if comment_prop: 1105 comment_prop.pop() 1106 query.replace(exp.paren(query)) 1107 else: 1108 comment_prop = None 1109 1110 create_sql = super().create_sql(expression) 1111 1112 comment_sql = self.sql(comment_prop) 1113 comment_sql = f" {comment_sql}" if comment_sql else "" 1114 1115 return f"{create_sql}{comment_sql}" 1116 1117 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1118 this = self.indent(self.sql(expression, "this")) 1119 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1120 1121 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1122 this = self.sql(expression, "this") 1123 this = f" {this}" if this else "" 1124 expr = self.sql(expression, "expression") 1125 expr = f" {expr}" if expr else "" 1126 index_type = self.sql(expression, "index_type") 1127 index_type = f" TYPE {index_type}" if index_type else "" 1128 granularity = self.sql(expression, "granularity") 1129 granularity = f" GRANULARITY {granularity}" if granularity else "" 1130 1131 return f"INDEX{this}{expr}{index_type}{granularity}" 1132 1133 def partition_sql(self, expression: exp.Partition) -> str: 1134 return f"PARTITION {self.expressions(expression, flat=True)}" 1135 1136 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1137 return f"ID {self.sql(expression.this)}" 1138 1139 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1140 return ( 1141 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1142 ) 1143 1144 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1145 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
133class ClickHouse(Dialect): 134 NORMALIZE_FUNCTIONS: bool | str = False 135 NULL_ORDERING = "nulls_are_last" 136 SUPPORTS_USER_DEFINED_TYPES = False 137 SAFE_DIVISION = True 138 LOG_BASE_FIRST: t.Optional[bool] = None 139 FORCE_EARLY_ALIAS_REF_EXPANSION = True 140 141 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 142 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 143 144 UNESCAPED_SEQUENCES = { 145 "\\0": "\0", 146 } 147 148 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 149 150 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 151 exp.Except: False, 152 exp.Intersect: False, 153 exp.Union: None, 154 } 155 156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 } 202 203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 } 401 402 FUNCTION_PARSERS.pop("MATCH") 403 404 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 405 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 406 407 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 408 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 409 410 RANGE_PARSERS = { 411 **parser.Parser.RANGE_PARSERS, 412 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 413 and self._parse_in(this, is_global=True), 414 } 415 416 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 417 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 418 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 419 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 420 421 JOIN_KINDS = { 422 *parser.Parser.JOIN_KINDS, 423 TokenType.ANY, 424 TokenType.ASOF, 425 TokenType.ARRAY, 426 } 427 428 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 429 TokenType.ANY, 430 TokenType.ARRAY, 431 TokenType.FINAL, 432 TokenType.FORMAT, 433 TokenType.SETTINGS, 434 } 435 436 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 437 TokenType.FORMAT, 438 } 439 440 LOG_DEFAULTS_TO_LN = True 441 442 QUERY_MODIFIER_PARSERS = { 443 **parser.Parser.QUERY_MODIFIER_PARSERS, 444 TokenType.SETTINGS: lambda self: ( 445 "settings", 446 self._advance() or self._parse_csv(self._parse_assignment), 447 ), 448 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 449 } 450 451 CONSTRAINT_PARSERS = { 452 **parser.Parser.CONSTRAINT_PARSERS, 453 "INDEX": lambda self: self._parse_index_constraint(), 454 "CODEC": lambda self: self._parse_compress(), 455 } 456 457 ALTER_PARSERS = { 458 **parser.Parser.ALTER_PARSERS, 459 "REPLACE": lambda self: self._parse_alter_table_replace(), 460 } 461 462 SCHEMA_UNNAMED_CONSTRAINTS = { 463 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 464 "INDEX", 465 } 466 467 PLACEHOLDER_PARSERS = { 468 **parser.Parser.PLACEHOLDER_PARSERS, 469 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 470 } 471 472 def _parse_types( 473 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 474 ) -> t.Optional[exp.Expression]: 475 dtype = super()._parse_types( 476 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 477 ) 478 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 479 # Mark every type as non-nullable which is ClickHouse's default, unless it's 480 # already marked as nullable. This marker helps us transpile types from other 481 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 482 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 483 # fail in ClickHouse without the `Nullable` type constructor. 484 dtype.set("nullable", False) 485 486 return dtype 487 488 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 489 index = self._index 490 this = self._parse_bitwise() 491 if self._match(TokenType.FROM): 492 self._retreat(index) 493 return super()._parse_extract() 494 495 # We return Anonymous here because extract and regexpExtract have different semantics, 496 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 497 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 498 # 499 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 500 self._match(TokenType.COMMA) 501 return self.expression( 502 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 503 ) 504 505 def _parse_assignment(self) -> t.Optional[exp.Expression]: 506 this = super()._parse_assignment() 507 508 if self._match(TokenType.PLACEHOLDER): 509 return self.expression( 510 exp.If, 511 this=this, 512 true=self._parse_assignment(), 513 false=self._match(TokenType.COLON) and self._parse_assignment(), 514 ) 515 516 return this 517 518 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 519 """ 520 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 521 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 522 """ 523 this = self._parse_id_var() 524 self._match(TokenType.COLON) 525 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 526 self._match_text_seq("IDENTIFIER") and "Identifier" 527 ) 528 529 if not kind: 530 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 531 elif not self._match(TokenType.R_BRACE): 532 self.raise_error("Expecting }") 533 534 return self.expression(exp.Placeholder, this=this, kind=kind) 535 536 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 537 this = super()._parse_in(this) 538 this.set("is_global", is_global) 539 return this 540 541 def _parse_table( 542 self, 543 schema: bool = False, 544 joins: bool = False, 545 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 546 parse_bracket: bool = False, 547 is_db_reference: bool = False, 548 parse_partition: bool = False, 549 ) -> t.Optional[exp.Expression]: 550 this = super()._parse_table( 551 schema=schema, 552 joins=joins, 553 alias_tokens=alias_tokens, 554 parse_bracket=parse_bracket, 555 is_db_reference=is_db_reference, 556 ) 557 558 if self._match(TokenType.FINAL): 559 this = self.expression(exp.Final, this=this) 560 561 return this 562 563 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 564 return super()._parse_position(haystack_first=True) 565 566 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 567 def _parse_cte(self) -> exp.CTE: 568 # WITH <identifier> AS <subquery expression> 569 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 570 571 if not cte: 572 # WITH <expression> AS <identifier> 573 cte = self.expression( 574 exp.CTE, 575 this=self._parse_assignment(), 576 alias=self._parse_table_alias(), 577 scalar=True, 578 ) 579 580 return cte 581 582 def _parse_join_parts( 583 self, 584 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 585 is_global = self._match(TokenType.GLOBAL) and self._prev 586 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 587 588 if kind_pre: 589 kind = self._match_set(self.JOIN_KINDS) and self._prev 590 side = self._match_set(self.JOIN_SIDES) and self._prev 591 return is_global, side, kind 592 593 return ( 594 is_global, 595 self._match_set(self.JOIN_SIDES) and self._prev, 596 self._match_set(self.JOIN_KINDS) and self._prev, 597 ) 598 599 def _parse_join( 600 self, skip_join_token: bool = False, parse_bracket: bool = False 601 ) -> t.Optional[exp.Join]: 602 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 603 if join: 604 join.set("global", join.args.pop("method", None)) 605 606 return join 607 608 def _parse_function( 609 self, 610 functions: t.Optional[t.Dict[str, t.Callable]] = None, 611 anonymous: bool = False, 612 optional_parens: bool = True, 613 any_token: bool = False, 614 ) -> t.Optional[exp.Expression]: 615 expr = super()._parse_function( 616 functions=functions, 617 anonymous=anonymous, 618 optional_parens=optional_parens, 619 any_token=any_token, 620 ) 621 622 func = expr.this if isinstance(expr, exp.Window) else expr 623 624 # Aggregate functions can be split in 2 parts: <func_name><suffix> 625 parts = ( 626 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 627 ) 628 629 if parts: 630 params = self._parse_func_params(func) 631 632 kwargs = { 633 "this": func.this, 634 "expressions": func.expressions, 635 } 636 if parts[1]: 637 kwargs["parts"] = parts 638 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 639 else: 640 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 641 642 kwargs["exp_class"] = exp_class 643 if params: 644 kwargs["params"] = params 645 646 func = self.expression(**kwargs) 647 648 if isinstance(expr, exp.Window): 649 # The window's func was parsed as Anonymous in base parser, fix its 650 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 651 expr.set("this", func) 652 elif params: 653 # Params have blocked super()._parse_function() from parsing the following window 654 # (if that exists) as they're standing between the function call and the window spec 655 expr = self._parse_window(func) 656 else: 657 expr = func 658 659 return expr 660 661 def _parse_func_params( 662 self, this: t.Optional[exp.Func] = None 663 ) -> t.Optional[t.List[exp.Expression]]: 664 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 665 return self._parse_csv(self._parse_lambda) 666 667 if self._match(TokenType.L_PAREN): 668 params = self._parse_csv(self._parse_lambda) 669 self._match_r_paren(this) 670 return params 671 672 return None 673 674 def _parse_quantile(self) -> exp.Quantile: 675 this = self._parse_lambda() 676 params = self._parse_func_params() 677 if params: 678 return self.expression(exp.Quantile, this=params[0], quantile=this) 679 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 680 681 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 682 return super()._parse_wrapped_id_vars(optional=True) 683 684 def _parse_primary_key( 685 self, wrapped_optional: bool = False, in_props: bool = False 686 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 687 return super()._parse_primary_key( 688 wrapped_optional=wrapped_optional or in_props, in_props=in_props 689 ) 690 691 def _parse_on_property(self) -> t.Optional[exp.Expression]: 692 index = self._index 693 if self._match_text_seq("CLUSTER"): 694 this = self._parse_id_var() 695 if this: 696 return self.expression(exp.OnCluster, this=this) 697 else: 698 self._retreat(index) 699 return None 700 701 def _parse_index_constraint( 702 self, kind: t.Optional[str] = None 703 ) -> exp.IndexColumnConstraint: 704 # INDEX name1 expr TYPE type1(args) GRANULARITY value 705 this = self._parse_id_var() 706 expression = self._parse_assignment() 707 708 index_type = self._match_text_seq("TYPE") and ( 709 self._parse_function() or self._parse_var() 710 ) 711 712 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 713 714 return self.expression( 715 exp.IndexColumnConstraint, 716 this=this, 717 expression=expression, 718 index_type=index_type, 719 granularity=granularity, 720 ) 721 722 def _parse_partition(self) -> t.Optional[exp.Partition]: 723 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 724 if not self._match(TokenType.PARTITION): 725 return None 726 727 if self._match_text_seq("ID"): 728 # Corresponds to the PARTITION ID <string_value> syntax 729 expressions: t.List[exp.Expression] = [ 730 self.expression(exp.PartitionId, this=self._parse_string()) 731 ] 732 else: 733 expressions = self._parse_expressions() 734 735 return self.expression(exp.Partition, expressions=expressions) 736 737 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 738 partition = self._parse_partition() 739 740 if not partition or not self._match(TokenType.FROM): 741 return None 742 743 return self.expression( 744 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 745 ) 746 747 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 748 if not self._match_text_seq("PROJECTION"): 749 return None 750 751 return self.expression( 752 exp.ProjectionDef, 753 this=self._parse_id_var(), 754 expression=self._parse_wrapped(self._parse_statement), 755 ) 756 757 def _parse_constraint(self) -> t.Optional[exp.Expression]: 758 return super()._parse_constraint() or self._parse_projection_def() 759 760 def _parse_alias( 761 self, this: t.Optional[exp.Expression], explicit: bool = False 762 ) -> t.Optional[exp.Expression]: 763 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 764 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 765 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 766 return this 767 768 return super()._parse_alias(this=this, explicit=explicit) 769 770 def _parse_expression(self) -> t.Optional[exp.Expression]: 771 this = super()._parse_expression() 772 773 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 774 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 775 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 776 self._match(TokenType.R_PAREN) 777 778 return this 779 780 class Generator(generator.Generator): 781 QUERY_HINTS = False 782 STRUCT_DELIMITER = ("(", ")") 783 NVL2_SUPPORTED = False 784 TABLESAMPLE_REQUIRES_PARENS = False 785 TABLESAMPLE_SIZE_IS_ROWS = False 786 TABLESAMPLE_KEYWORDS = "SAMPLE" 787 LAST_DAY_SUPPORTS_DATE_PART = False 788 CAN_IMPLEMENT_ARRAY_ANY = True 789 SUPPORTS_TO_NUMBER = False 790 JOIN_HINTS = False 791 TABLE_HINTS = False 792 GROUPINGS_SEP = "" 793 SET_OP_MODIFIERS = False 794 SUPPORTS_TABLE_ALIAS_COLUMNS = False 795 VALUES_AS_TABLE = False 796 797 STRING_TYPE_MAPPING = { 798 exp.DataType.Type.CHAR: "String", 799 exp.DataType.Type.LONGBLOB: "String", 800 exp.DataType.Type.LONGTEXT: "String", 801 exp.DataType.Type.MEDIUMBLOB: "String", 802 exp.DataType.Type.MEDIUMTEXT: "String", 803 exp.DataType.Type.TINYBLOB: "String", 804 exp.DataType.Type.TINYTEXT: "String", 805 exp.DataType.Type.TEXT: "String", 806 exp.DataType.Type.VARBINARY: "String", 807 exp.DataType.Type.VARCHAR: "String", 808 } 809 810 SUPPORTED_JSON_PATH_PARTS = { 811 exp.JSONPathKey, 812 exp.JSONPathRoot, 813 exp.JSONPathSubscript, 814 } 815 816 TYPE_MAPPING = { 817 **generator.Generator.TYPE_MAPPING, 818 **STRING_TYPE_MAPPING, 819 exp.DataType.Type.ARRAY: "Array", 820 exp.DataType.Type.BIGINT: "Int64", 821 exp.DataType.Type.DATE32: "Date32", 822 exp.DataType.Type.DATETIME: "DateTime", 823 exp.DataType.Type.DATETIME64: "DateTime64", 824 exp.DataType.Type.TIMESTAMP: "DateTime", 825 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 826 exp.DataType.Type.DOUBLE: "Float64", 827 exp.DataType.Type.ENUM: "Enum", 828 exp.DataType.Type.ENUM8: "Enum8", 829 exp.DataType.Type.ENUM16: "Enum16", 830 exp.DataType.Type.FIXEDSTRING: "FixedString", 831 exp.DataType.Type.FLOAT: "Float32", 832 exp.DataType.Type.INT: "Int32", 833 exp.DataType.Type.MEDIUMINT: "Int32", 834 exp.DataType.Type.INT128: "Int128", 835 exp.DataType.Type.INT256: "Int256", 836 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 837 exp.DataType.Type.MAP: "Map", 838 exp.DataType.Type.NESTED: "Nested", 839 exp.DataType.Type.SMALLINT: "Int16", 840 exp.DataType.Type.STRUCT: "Tuple", 841 exp.DataType.Type.TINYINT: "Int8", 842 exp.DataType.Type.UBIGINT: "UInt64", 843 exp.DataType.Type.UINT: "UInt32", 844 exp.DataType.Type.UINT128: "UInt128", 845 exp.DataType.Type.UINT256: "UInt256", 846 exp.DataType.Type.USMALLINT: "UInt16", 847 exp.DataType.Type.UTINYINT: "UInt8", 848 exp.DataType.Type.IPV4: "IPv4", 849 exp.DataType.Type.IPV6: "IPv6", 850 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 851 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 852 } 853 854 TRANSFORMS = { 855 **generator.Generator.TRANSFORMS, 856 exp.AnyValue: rename_func("any"), 857 exp.ApproxDistinct: rename_func("uniq"), 858 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 859 exp.ArraySize: rename_func("LENGTH"), 860 exp.ArraySum: rename_func("arraySum"), 861 exp.ArgMax: arg_max_or_min_no_count("argMax"), 862 exp.ArgMin: arg_max_or_min_no_count("argMin"), 863 exp.Array: inline_array_sql, 864 exp.CastToStrType: rename_func("CAST"), 865 exp.CountIf: rename_func("countIf"), 866 exp.CompressColumnConstraint: lambda self, 867 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 868 exp.ComputedColumnConstraint: lambda self, 869 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 870 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 871 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 872 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 873 exp.DateStrToDate: rename_func("toDate"), 874 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 875 exp.Explode: rename_func("arrayJoin"), 876 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 877 exp.IsNan: rename_func("isNaN"), 878 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 879 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 880 exp.JSONPathKey: json_path_key_only_name, 881 exp.JSONPathRoot: lambda *_: "", 882 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 883 exp.Nullif: rename_func("nullIf"), 884 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 885 exp.Pivot: no_pivot_sql, 886 exp.Quantile: _quantile_sql, 887 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 888 exp.Rand: rename_func("randCanonical"), 889 exp.StartsWith: rename_func("startsWith"), 890 exp.StrPosition: lambda self, e: self.func( 891 "position", e.this, e.args.get("substr"), e.args.get("position") 892 ), 893 exp.TimeToStr: lambda self, e: self.func( 894 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 895 ), 896 exp.TimeStrToTime: _timestrtotime_sql, 897 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 898 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 899 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 900 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 901 exp.MD5Digest: rename_func("MD5"), 902 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 903 exp.SHA: rename_func("SHA1"), 904 exp.SHA2: sha256_sql, 905 exp.UnixToTime: _unix_to_time_sql, 906 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 907 exp.Trim: trim_sql, 908 exp.Variance: rename_func("varSamp"), 909 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 910 exp.Stddev: rename_func("stddevSamp"), 911 exp.Chr: rename_func("CHAR"), 912 exp.Lag: lambda self, e: self.func( 913 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 914 ), 915 exp.Lead: lambda self, e: self.func( 916 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 917 ), 918 } 919 920 PROPERTIES_LOCATION = { 921 **generator.Generator.PROPERTIES_LOCATION, 922 exp.OnCluster: exp.Properties.Location.POST_NAME, 923 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 924 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 925 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 926 } 927 928 # There's no list in docs, but it can be found in Clickhouse code 929 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 930 ON_CLUSTER_TARGETS = { 931 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 932 "DATABASE", 933 "TABLE", 934 "VIEW", 935 "DICTIONARY", 936 "INDEX", 937 "FUNCTION", 938 "NAMED COLLECTION", 939 } 940 941 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 942 NON_NULLABLE_TYPES = { 943 exp.DataType.Type.ARRAY, 944 exp.DataType.Type.MAP, 945 exp.DataType.Type.STRUCT, 946 } 947 948 def strtodate_sql(self, expression: exp.StrToDate) -> str: 949 strtodate_sql = self.function_fallback_sql(expression) 950 951 if not isinstance(expression.parent, exp.Cast): 952 # StrToDate returns DATEs in other dialects (eg. postgres), so 953 # this branch aims to improve the transpilation to clickhouse 954 return f"CAST({strtodate_sql} AS DATE)" 955 956 return strtodate_sql 957 958 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 959 this = expression.this 960 961 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 962 return self.sql(this) 963 964 return super().cast_sql(expression, safe_prefix=safe_prefix) 965 966 def trycast_sql(self, expression: exp.TryCast) -> str: 967 dtype = expression.to 968 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 969 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 970 dtype.set("nullable", True) 971 972 return super().cast_sql(expression) 973 974 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 975 this = self.json_path_part(expression.this) 976 return str(int(this) + 1) if is_int(this) else this 977 978 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 979 return f"AS {self.sql(expression, 'this')}" 980 981 def _any_to_has( 982 self, 983 expression: exp.EQ | exp.NEQ, 984 default: t.Callable[[t.Any], str], 985 prefix: str = "", 986 ) -> str: 987 if isinstance(expression.left, exp.Any): 988 arr = expression.left 989 this = expression.right 990 elif isinstance(expression.right, exp.Any): 991 arr = expression.right 992 this = expression.left 993 else: 994 return default(expression) 995 996 return prefix + self.func("has", arr.this.unnest(), this) 997 998 def eq_sql(self, expression: exp.EQ) -> str: 999 return self._any_to_has(expression, super().eq_sql) 1000 1001 def neq_sql(self, expression: exp.NEQ) -> str: 1002 return self._any_to_has(expression, super().neq_sql, "NOT ") 1003 1004 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1005 # Manually add a flag to make the search case-insensitive 1006 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1007 return self.func("match", expression.this, regex) 1008 1009 def datatype_sql(self, expression: exp.DataType) -> str: 1010 # String is the standard ClickHouse type, every other variant is just an alias. 1011 # Additionally, any supplied length parameter will be ignored. 1012 # 1013 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1014 if expression.this in self.STRING_TYPE_MAPPING: 1015 dtype = "String" 1016 else: 1017 dtype = super().datatype_sql(expression) 1018 1019 # This section changes the type to `Nullable(...)` if the following conditions hold: 1020 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1021 # and change their semantics 1022 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1023 # constraint: "Type of Map key must be a type, that can be represented by integer or 1024 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1025 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1026 parent = expression.parent 1027 nullable = expression.args.get("nullable") 1028 if nullable is True or ( 1029 nullable is None 1030 and not ( 1031 isinstance(parent, exp.DataType) 1032 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1033 and expression.index in (None, 0) 1034 ) 1035 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1036 ): 1037 dtype = f"Nullable({dtype})" 1038 1039 return dtype 1040 1041 def cte_sql(self, expression: exp.CTE) -> str: 1042 if expression.args.get("scalar"): 1043 this = self.sql(expression, "this") 1044 alias = self.sql(expression, "alias") 1045 return f"{this} AS {alias}" 1046 1047 return super().cte_sql(expression) 1048 1049 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1050 return super().after_limit_modifiers(expression) + [ 1051 ( 1052 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1053 if expression.args.get("settings") 1054 else "" 1055 ), 1056 ( 1057 self.seg("FORMAT ") + self.sql(expression, "format") 1058 if expression.args.get("format") 1059 else "" 1060 ), 1061 ] 1062 1063 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1064 params = self.expressions(expression, key="params", flat=True) 1065 return self.func(expression.name, *expression.expressions) + f"({params})" 1066 1067 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1068 return self.func(expression.name, *expression.expressions) 1069 1070 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1071 return self.anonymousaggfunc_sql(expression) 1072 1073 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1074 return self.parameterizedagg_sql(expression) 1075 1076 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1077 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1078 1079 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1080 return f"ON CLUSTER {self.sql(expression, 'this')}" 1081 1082 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1083 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1084 exp.Properties.Location.POST_NAME 1085 ): 1086 this_name = self.sql( 1087 expression.this if isinstance(expression.this, exp.Schema) else expression, 1088 "this", 1089 ) 1090 this_properties = " ".join( 1091 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1092 ) 1093 this_schema = self.schema_columns_sql(expression.this) 1094 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1095 1096 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1097 1098 return super().createable_sql(expression, locations) 1099 1100 def create_sql(self, expression: exp.Create) -> str: 1101 # The comment property comes last in CTAS statements, i.e. after the query 1102 query = expression.expression 1103 if isinstance(query, exp.Query): 1104 comment_prop = expression.find(exp.SchemaCommentProperty) 1105 if comment_prop: 1106 comment_prop.pop() 1107 query.replace(exp.paren(query)) 1108 else: 1109 comment_prop = None 1110 1111 create_sql = super().create_sql(expression) 1112 1113 comment_sql = self.sql(comment_prop) 1114 comment_sql = f" {comment_sql}" if comment_sql else "" 1115 1116 return f"{create_sql}{comment_sql}" 1117 1118 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1119 this = self.indent(self.sql(expression, "this")) 1120 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1121 1122 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1123 this = self.sql(expression, "this") 1124 this = f" {this}" if this else "" 1125 expr = self.sql(expression, "expression") 1126 expr = f" {expr}" if expr else "" 1127 index_type = self.sql(expression, "index_type") 1128 index_type = f" TYPE {index_type}" if index_type else "" 1129 granularity = self.sql(expression, "granularity") 1130 granularity = f" GRANULARITY {granularity}" if granularity else "" 1131 1132 return f"INDEX{this}{expr}{index_type}{granularity}" 1133 1134 def partition_sql(self, expression: exp.Partition) -> str: 1135 return f"PARTITION {self.expressions(expression, flat=True)}" 1136 1137 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1138 return f"ID {self.sql(expression.this)}" 1139 1140 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1141 return ( 1142 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1143 ) 1144 1145 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1146 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 } 401 402 FUNCTION_PARSERS.pop("MATCH") 403 404 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 405 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 406 407 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 408 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 409 410 RANGE_PARSERS = { 411 **parser.Parser.RANGE_PARSERS, 412 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 413 and self._parse_in(this, is_global=True), 414 } 415 416 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 417 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 418 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 419 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 420 421 JOIN_KINDS = { 422 *parser.Parser.JOIN_KINDS, 423 TokenType.ANY, 424 TokenType.ASOF, 425 TokenType.ARRAY, 426 } 427 428 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 429 TokenType.ANY, 430 TokenType.ARRAY, 431 TokenType.FINAL, 432 TokenType.FORMAT, 433 TokenType.SETTINGS, 434 } 435 436 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 437 TokenType.FORMAT, 438 } 439 440 LOG_DEFAULTS_TO_LN = True 441 442 QUERY_MODIFIER_PARSERS = { 443 **parser.Parser.QUERY_MODIFIER_PARSERS, 444 TokenType.SETTINGS: lambda self: ( 445 "settings", 446 self._advance() or self._parse_csv(self._parse_assignment), 447 ), 448 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 449 } 450 451 CONSTRAINT_PARSERS = { 452 **parser.Parser.CONSTRAINT_PARSERS, 453 "INDEX": lambda self: self._parse_index_constraint(), 454 "CODEC": lambda self: self._parse_compress(), 455 } 456 457 ALTER_PARSERS = { 458 **parser.Parser.ALTER_PARSERS, 459 "REPLACE": lambda self: self._parse_alter_table_replace(), 460 } 461 462 SCHEMA_UNNAMED_CONSTRAINTS = { 463 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 464 "INDEX", 465 } 466 467 PLACEHOLDER_PARSERS = { 468 **parser.Parser.PLACEHOLDER_PARSERS, 469 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 470 } 471 472 def _parse_types( 473 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 474 ) -> t.Optional[exp.Expression]: 475 dtype = super()._parse_types( 476 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 477 ) 478 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 479 # Mark every type as non-nullable which is ClickHouse's default, unless it's 480 # already marked as nullable. This marker helps us transpile types from other 481 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 482 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 483 # fail in ClickHouse without the `Nullable` type constructor. 484 dtype.set("nullable", False) 485 486 return dtype 487 488 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 489 index = self._index 490 this = self._parse_bitwise() 491 if self._match(TokenType.FROM): 492 self._retreat(index) 493 return super()._parse_extract() 494 495 # We return Anonymous here because extract and regexpExtract have different semantics, 496 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 497 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 498 # 499 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 500 self._match(TokenType.COMMA) 501 return self.expression( 502 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 503 ) 504 505 def _parse_assignment(self) -> t.Optional[exp.Expression]: 506 this = super()._parse_assignment() 507 508 if self._match(TokenType.PLACEHOLDER): 509 return self.expression( 510 exp.If, 511 this=this, 512 true=self._parse_assignment(), 513 false=self._match(TokenType.COLON) and self._parse_assignment(), 514 ) 515 516 return this 517 518 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 519 """ 520 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 521 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 522 """ 523 this = self._parse_id_var() 524 self._match(TokenType.COLON) 525 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 526 self._match_text_seq("IDENTIFIER") and "Identifier" 527 ) 528 529 if not kind: 530 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 531 elif not self._match(TokenType.R_BRACE): 532 self.raise_error("Expecting }") 533 534 return self.expression(exp.Placeholder, this=this, kind=kind) 535 536 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 537 this = super()._parse_in(this) 538 this.set("is_global", is_global) 539 return this 540 541 def _parse_table( 542 self, 543 schema: bool = False, 544 joins: bool = False, 545 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 546 parse_bracket: bool = False, 547 is_db_reference: bool = False, 548 parse_partition: bool = False, 549 ) -> t.Optional[exp.Expression]: 550 this = super()._parse_table( 551 schema=schema, 552 joins=joins, 553 alias_tokens=alias_tokens, 554 parse_bracket=parse_bracket, 555 is_db_reference=is_db_reference, 556 ) 557 558 if self._match(TokenType.FINAL): 559 this = self.expression(exp.Final, this=this) 560 561 return this 562 563 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 564 return super()._parse_position(haystack_first=True) 565 566 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 567 def _parse_cte(self) -> exp.CTE: 568 # WITH <identifier> AS <subquery expression> 569 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 570 571 if not cte: 572 # WITH <expression> AS <identifier> 573 cte = self.expression( 574 exp.CTE, 575 this=self._parse_assignment(), 576 alias=self._parse_table_alias(), 577 scalar=True, 578 ) 579 580 return cte 581 582 def _parse_join_parts( 583 self, 584 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 585 is_global = self._match(TokenType.GLOBAL) and self._prev 586 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 587 588 if kind_pre: 589 kind = self._match_set(self.JOIN_KINDS) and self._prev 590 side = self._match_set(self.JOIN_SIDES) and self._prev 591 return is_global, side, kind 592 593 return ( 594 is_global, 595 self._match_set(self.JOIN_SIDES) and self._prev, 596 self._match_set(self.JOIN_KINDS) and self._prev, 597 ) 598 599 def _parse_join( 600 self, skip_join_token: bool = False, parse_bracket: bool = False 601 ) -> t.Optional[exp.Join]: 602 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 603 if join: 604 join.set("global", join.args.pop("method", None)) 605 606 return join 607 608 def _parse_function( 609 self, 610 functions: t.Optional[t.Dict[str, t.Callable]] = None, 611 anonymous: bool = False, 612 optional_parens: bool = True, 613 any_token: bool = False, 614 ) -> t.Optional[exp.Expression]: 615 expr = super()._parse_function( 616 functions=functions, 617 anonymous=anonymous, 618 optional_parens=optional_parens, 619 any_token=any_token, 620 ) 621 622 func = expr.this if isinstance(expr, exp.Window) else expr 623 624 # Aggregate functions can be split in 2 parts: <func_name><suffix> 625 parts = ( 626 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 627 ) 628 629 if parts: 630 params = self._parse_func_params(func) 631 632 kwargs = { 633 "this": func.this, 634 "expressions": func.expressions, 635 } 636 if parts[1]: 637 kwargs["parts"] = parts 638 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 639 else: 640 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 641 642 kwargs["exp_class"] = exp_class 643 if params: 644 kwargs["params"] = params 645 646 func = self.expression(**kwargs) 647 648 if isinstance(expr, exp.Window): 649 # The window's func was parsed as Anonymous in base parser, fix its 650 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 651 expr.set("this", func) 652 elif params: 653 # Params have blocked super()._parse_function() from parsing the following window 654 # (if that exists) as they're standing between the function call and the window spec 655 expr = self._parse_window(func) 656 else: 657 expr = func 658 659 return expr 660 661 def _parse_func_params( 662 self, this: t.Optional[exp.Func] = None 663 ) -> t.Optional[t.List[exp.Expression]]: 664 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 665 return self._parse_csv(self._parse_lambda) 666 667 if self._match(TokenType.L_PAREN): 668 params = self._parse_csv(self._parse_lambda) 669 self._match_r_paren(this) 670 return params 671 672 return None 673 674 def _parse_quantile(self) -> exp.Quantile: 675 this = self._parse_lambda() 676 params = self._parse_func_params() 677 if params: 678 return self.expression(exp.Quantile, this=params[0], quantile=this) 679 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 680 681 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 682 return super()._parse_wrapped_id_vars(optional=True) 683 684 def _parse_primary_key( 685 self, wrapped_optional: bool = False, in_props: bool = False 686 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 687 return super()._parse_primary_key( 688 wrapped_optional=wrapped_optional or in_props, in_props=in_props 689 ) 690 691 def _parse_on_property(self) -> t.Optional[exp.Expression]: 692 index = self._index 693 if self._match_text_seq("CLUSTER"): 694 this = self._parse_id_var() 695 if this: 696 return self.expression(exp.OnCluster, this=this) 697 else: 698 self._retreat(index) 699 return None 700 701 def _parse_index_constraint( 702 self, kind: t.Optional[str] = None 703 ) -> exp.IndexColumnConstraint: 704 # INDEX name1 expr TYPE type1(args) GRANULARITY value 705 this = self._parse_id_var() 706 expression = self._parse_assignment() 707 708 index_type = self._match_text_seq("TYPE") and ( 709 self._parse_function() or self._parse_var() 710 ) 711 712 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 713 714 return self.expression( 715 exp.IndexColumnConstraint, 716 this=this, 717 expression=expression, 718 index_type=index_type, 719 granularity=granularity, 720 ) 721 722 def _parse_partition(self) -> t.Optional[exp.Partition]: 723 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 724 if not self._match(TokenType.PARTITION): 725 return None 726 727 if self._match_text_seq("ID"): 728 # Corresponds to the PARTITION ID <string_value> syntax 729 expressions: t.List[exp.Expression] = [ 730 self.expression(exp.PartitionId, this=self._parse_string()) 731 ] 732 else: 733 expressions = self._parse_expressions() 734 735 return self.expression(exp.Partition, expressions=expressions) 736 737 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 738 partition = self._parse_partition() 739 740 if not partition or not self._match(TokenType.FROM): 741 return None 742 743 return self.expression( 744 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 745 ) 746 747 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 748 if not self._match_text_seq("PROJECTION"): 749 return None 750 751 return self.expression( 752 exp.ProjectionDef, 753 this=self._parse_id_var(), 754 expression=self._parse_wrapped(self._parse_statement), 755 ) 756 757 def _parse_constraint(self) -> t.Optional[exp.Expression]: 758 return super()._parse_constraint() or self._parse_projection_def() 759 760 def _parse_alias( 761 self, this: t.Optional[exp.Expression], explicit: bool = False 762 ) -> t.Optional[exp.Expression]: 763 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 764 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 765 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 766 return this 767 768 return super()._parse_alias(this=this, explicit=explicit) 769 770 def _parse_expression(self) -> t.Optional[exp.Expression]: 771 this = super()._parse_expression() 772 773 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 774 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 775 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 776 self._match(TokenType.R_PAREN) 777 778 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
780 class Generator(generator.Generator): 781 QUERY_HINTS = False 782 STRUCT_DELIMITER = ("(", ")") 783 NVL2_SUPPORTED = False 784 TABLESAMPLE_REQUIRES_PARENS = False 785 TABLESAMPLE_SIZE_IS_ROWS = False 786 TABLESAMPLE_KEYWORDS = "SAMPLE" 787 LAST_DAY_SUPPORTS_DATE_PART = False 788 CAN_IMPLEMENT_ARRAY_ANY = True 789 SUPPORTS_TO_NUMBER = False 790 JOIN_HINTS = False 791 TABLE_HINTS = False 792 GROUPINGS_SEP = "" 793 SET_OP_MODIFIERS = False 794 SUPPORTS_TABLE_ALIAS_COLUMNS = False 795 VALUES_AS_TABLE = False 796 797 STRING_TYPE_MAPPING = { 798 exp.DataType.Type.CHAR: "String", 799 exp.DataType.Type.LONGBLOB: "String", 800 exp.DataType.Type.LONGTEXT: "String", 801 exp.DataType.Type.MEDIUMBLOB: "String", 802 exp.DataType.Type.MEDIUMTEXT: "String", 803 exp.DataType.Type.TINYBLOB: "String", 804 exp.DataType.Type.TINYTEXT: "String", 805 exp.DataType.Type.TEXT: "String", 806 exp.DataType.Type.VARBINARY: "String", 807 exp.DataType.Type.VARCHAR: "String", 808 } 809 810 SUPPORTED_JSON_PATH_PARTS = { 811 exp.JSONPathKey, 812 exp.JSONPathRoot, 813 exp.JSONPathSubscript, 814 } 815 816 TYPE_MAPPING = { 817 **generator.Generator.TYPE_MAPPING, 818 **STRING_TYPE_MAPPING, 819 exp.DataType.Type.ARRAY: "Array", 820 exp.DataType.Type.BIGINT: "Int64", 821 exp.DataType.Type.DATE32: "Date32", 822 exp.DataType.Type.DATETIME: "DateTime", 823 exp.DataType.Type.DATETIME64: "DateTime64", 824 exp.DataType.Type.TIMESTAMP: "DateTime", 825 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 826 exp.DataType.Type.DOUBLE: "Float64", 827 exp.DataType.Type.ENUM: "Enum", 828 exp.DataType.Type.ENUM8: "Enum8", 829 exp.DataType.Type.ENUM16: "Enum16", 830 exp.DataType.Type.FIXEDSTRING: "FixedString", 831 exp.DataType.Type.FLOAT: "Float32", 832 exp.DataType.Type.INT: "Int32", 833 exp.DataType.Type.MEDIUMINT: "Int32", 834 exp.DataType.Type.INT128: "Int128", 835 exp.DataType.Type.INT256: "Int256", 836 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 837 exp.DataType.Type.MAP: "Map", 838 exp.DataType.Type.NESTED: "Nested", 839 exp.DataType.Type.SMALLINT: "Int16", 840 exp.DataType.Type.STRUCT: "Tuple", 841 exp.DataType.Type.TINYINT: "Int8", 842 exp.DataType.Type.UBIGINT: "UInt64", 843 exp.DataType.Type.UINT: "UInt32", 844 exp.DataType.Type.UINT128: "UInt128", 845 exp.DataType.Type.UINT256: "UInt256", 846 exp.DataType.Type.USMALLINT: "UInt16", 847 exp.DataType.Type.UTINYINT: "UInt8", 848 exp.DataType.Type.IPV4: "IPv4", 849 exp.DataType.Type.IPV6: "IPv6", 850 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 851 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 852 } 853 854 TRANSFORMS = { 855 **generator.Generator.TRANSFORMS, 856 exp.AnyValue: rename_func("any"), 857 exp.ApproxDistinct: rename_func("uniq"), 858 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 859 exp.ArraySize: rename_func("LENGTH"), 860 exp.ArraySum: rename_func("arraySum"), 861 exp.ArgMax: arg_max_or_min_no_count("argMax"), 862 exp.ArgMin: arg_max_or_min_no_count("argMin"), 863 exp.Array: inline_array_sql, 864 exp.CastToStrType: rename_func("CAST"), 865 exp.CountIf: rename_func("countIf"), 866 exp.CompressColumnConstraint: lambda self, 867 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 868 exp.ComputedColumnConstraint: lambda self, 869 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 870 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 871 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 872 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 873 exp.DateStrToDate: rename_func("toDate"), 874 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 875 exp.Explode: rename_func("arrayJoin"), 876 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 877 exp.IsNan: rename_func("isNaN"), 878 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 879 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 880 exp.JSONPathKey: json_path_key_only_name, 881 exp.JSONPathRoot: lambda *_: "", 882 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 883 exp.Nullif: rename_func("nullIf"), 884 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 885 exp.Pivot: no_pivot_sql, 886 exp.Quantile: _quantile_sql, 887 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 888 exp.Rand: rename_func("randCanonical"), 889 exp.StartsWith: rename_func("startsWith"), 890 exp.StrPosition: lambda self, e: self.func( 891 "position", e.this, e.args.get("substr"), e.args.get("position") 892 ), 893 exp.TimeToStr: lambda self, e: self.func( 894 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 895 ), 896 exp.TimeStrToTime: _timestrtotime_sql, 897 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 898 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 899 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 900 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 901 exp.MD5Digest: rename_func("MD5"), 902 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 903 exp.SHA: rename_func("SHA1"), 904 exp.SHA2: sha256_sql, 905 exp.UnixToTime: _unix_to_time_sql, 906 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 907 exp.Trim: trim_sql, 908 exp.Variance: rename_func("varSamp"), 909 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 910 exp.Stddev: rename_func("stddevSamp"), 911 exp.Chr: rename_func("CHAR"), 912 exp.Lag: lambda self, e: self.func( 913 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 914 ), 915 exp.Lead: lambda self, e: self.func( 916 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 917 ), 918 } 919 920 PROPERTIES_LOCATION = { 921 **generator.Generator.PROPERTIES_LOCATION, 922 exp.OnCluster: exp.Properties.Location.POST_NAME, 923 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 924 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 925 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 926 } 927 928 # There's no list in docs, but it can be found in Clickhouse code 929 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 930 ON_CLUSTER_TARGETS = { 931 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 932 "DATABASE", 933 "TABLE", 934 "VIEW", 935 "DICTIONARY", 936 "INDEX", 937 "FUNCTION", 938 "NAMED COLLECTION", 939 } 940 941 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 942 NON_NULLABLE_TYPES = { 943 exp.DataType.Type.ARRAY, 944 exp.DataType.Type.MAP, 945 exp.DataType.Type.STRUCT, 946 } 947 948 def strtodate_sql(self, expression: exp.StrToDate) -> str: 949 strtodate_sql = self.function_fallback_sql(expression) 950 951 if not isinstance(expression.parent, exp.Cast): 952 # StrToDate returns DATEs in other dialects (eg. postgres), so 953 # this branch aims to improve the transpilation to clickhouse 954 return f"CAST({strtodate_sql} AS DATE)" 955 956 return strtodate_sql 957 958 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 959 this = expression.this 960 961 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 962 return self.sql(this) 963 964 return super().cast_sql(expression, safe_prefix=safe_prefix) 965 966 def trycast_sql(self, expression: exp.TryCast) -> str: 967 dtype = expression.to 968 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 969 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 970 dtype.set("nullable", True) 971 972 return super().cast_sql(expression) 973 974 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 975 this = self.json_path_part(expression.this) 976 return str(int(this) + 1) if is_int(this) else this 977 978 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 979 return f"AS {self.sql(expression, 'this')}" 980 981 def _any_to_has( 982 self, 983 expression: exp.EQ | exp.NEQ, 984 default: t.Callable[[t.Any], str], 985 prefix: str = "", 986 ) -> str: 987 if isinstance(expression.left, exp.Any): 988 arr = expression.left 989 this = expression.right 990 elif isinstance(expression.right, exp.Any): 991 arr = expression.right 992 this = expression.left 993 else: 994 return default(expression) 995 996 return prefix + self.func("has", arr.this.unnest(), this) 997 998 def eq_sql(self, expression: exp.EQ) -> str: 999 return self._any_to_has(expression, super().eq_sql) 1000 1001 def neq_sql(self, expression: exp.NEQ) -> str: 1002 return self._any_to_has(expression, super().neq_sql, "NOT ") 1003 1004 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1005 # Manually add a flag to make the search case-insensitive 1006 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1007 return self.func("match", expression.this, regex) 1008 1009 def datatype_sql(self, expression: exp.DataType) -> str: 1010 # String is the standard ClickHouse type, every other variant is just an alias. 1011 # Additionally, any supplied length parameter will be ignored. 1012 # 1013 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1014 if expression.this in self.STRING_TYPE_MAPPING: 1015 dtype = "String" 1016 else: 1017 dtype = super().datatype_sql(expression) 1018 1019 # This section changes the type to `Nullable(...)` if the following conditions hold: 1020 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1021 # and change their semantics 1022 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1023 # constraint: "Type of Map key must be a type, that can be represented by integer or 1024 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1025 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1026 parent = expression.parent 1027 nullable = expression.args.get("nullable") 1028 if nullable is True or ( 1029 nullable is None 1030 and not ( 1031 isinstance(parent, exp.DataType) 1032 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1033 and expression.index in (None, 0) 1034 ) 1035 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1036 ): 1037 dtype = f"Nullable({dtype})" 1038 1039 return dtype 1040 1041 def cte_sql(self, expression: exp.CTE) -> str: 1042 if expression.args.get("scalar"): 1043 this = self.sql(expression, "this") 1044 alias = self.sql(expression, "alias") 1045 return f"{this} AS {alias}" 1046 1047 return super().cte_sql(expression) 1048 1049 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1050 return super().after_limit_modifiers(expression) + [ 1051 ( 1052 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1053 if expression.args.get("settings") 1054 else "" 1055 ), 1056 ( 1057 self.seg("FORMAT ") + self.sql(expression, "format") 1058 if expression.args.get("format") 1059 else "" 1060 ), 1061 ] 1062 1063 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1064 params = self.expressions(expression, key="params", flat=True) 1065 return self.func(expression.name, *expression.expressions) + f"({params})" 1066 1067 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1068 return self.func(expression.name, *expression.expressions) 1069 1070 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1071 return self.anonymousaggfunc_sql(expression) 1072 1073 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1074 return self.parameterizedagg_sql(expression) 1075 1076 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1077 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1078 1079 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1080 return f"ON CLUSTER {self.sql(expression, 'this')}" 1081 1082 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1083 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1084 exp.Properties.Location.POST_NAME 1085 ): 1086 this_name = self.sql( 1087 expression.this if isinstance(expression.this, exp.Schema) else expression, 1088 "this", 1089 ) 1090 this_properties = " ".join( 1091 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1092 ) 1093 this_schema = self.schema_columns_sql(expression.this) 1094 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1095 1096 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1097 1098 return super().createable_sql(expression, locations) 1099 1100 def create_sql(self, expression: exp.Create) -> str: 1101 # The comment property comes last in CTAS statements, i.e. after the query 1102 query = expression.expression 1103 if isinstance(query, exp.Query): 1104 comment_prop = expression.find(exp.SchemaCommentProperty) 1105 if comment_prop: 1106 comment_prop.pop() 1107 query.replace(exp.paren(query)) 1108 else: 1109 comment_prop = None 1110 1111 create_sql = super().create_sql(expression) 1112 1113 comment_sql = self.sql(comment_prop) 1114 comment_sql = f" {comment_sql}" if comment_sql else "" 1115 1116 return f"{create_sql}{comment_sql}" 1117 1118 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1119 this = self.indent(self.sql(expression, "this")) 1120 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1121 1122 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1123 this = self.sql(expression, "this") 1124 this = f" {this}" if this else "" 1125 expr = self.sql(expression, "expression") 1126 expr = f" {expr}" if expr else "" 1127 index_type = self.sql(expression, "index_type") 1128 index_type = f" TYPE {index_type}" if index_type else "" 1129 granularity = self.sql(expression, "granularity") 1130 granularity = f" GRANULARITY {granularity}" if granularity else "" 1131 1132 return f"INDEX{this}{expr}{index_type}{granularity}" 1133 1134 def partition_sql(self, expression: exp.Partition) -> str: 1135 return f"PARTITION {self.expressions(expression, flat=True)}" 1136 1137 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1138 return f"ID {self.sql(expression.this)}" 1139 1140 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1141 return ( 1142 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1143 ) 1144 1145 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1146 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
948 def strtodate_sql(self, expression: exp.StrToDate) -> str: 949 strtodate_sql = self.function_fallback_sql(expression) 950 951 if not isinstance(expression.parent, exp.Cast): 952 # StrToDate returns DATEs in other dialects (eg. postgres), so 953 # this branch aims to improve the transpilation to clickhouse 954 return f"CAST({strtodate_sql} AS DATE)" 955 956 return strtodate_sql
958 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 959 this = expression.this 960 961 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 962 return self.sql(this) 963 964 return super().cast_sql(expression, safe_prefix=safe_prefix)
966 def trycast_sql(self, expression: exp.TryCast) -> str: 967 dtype = expression.to 968 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 969 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 970 dtype.set("nullable", True) 971 972 return super().cast_sql(expression)
1009 def datatype_sql(self, expression: exp.DataType) -> str: 1010 # String is the standard ClickHouse type, every other variant is just an alias. 1011 # Additionally, any supplied length parameter will be ignored. 1012 # 1013 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1014 if expression.this in self.STRING_TYPE_MAPPING: 1015 dtype = "String" 1016 else: 1017 dtype = super().datatype_sql(expression) 1018 1019 # This section changes the type to `Nullable(...)` if the following conditions hold: 1020 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1021 # and change their semantics 1022 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1023 # constraint: "Type of Map key must be a type, that can be represented by integer or 1024 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1025 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1026 parent = expression.parent 1027 nullable = expression.args.get("nullable") 1028 if nullable is True or ( 1029 nullable is None 1030 and not ( 1031 isinstance(parent, exp.DataType) 1032 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1033 and expression.index in (None, 0) 1034 ) 1035 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1036 ): 1037 dtype = f"Nullable({dtype})" 1038 1039 return dtype
1049 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1050 return super().after_limit_modifiers(expression) + [ 1051 ( 1052 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1053 if expression.args.get("settings") 1054 else "" 1055 ), 1056 ( 1057 self.seg("FORMAT ") + self.sql(expression, "format") 1058 if expression.args.get("format") 1059 else "" 1060 ), 1061 ]
1082 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1083 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1084 exp.Properties.Location.POST_NAME 1085 ): 1086 this_name = self.sql( 1087 expression.this if isinstance(expression.this, exp.Schema) else expression, 1088 "this", 1089 ) 1090 this_properties = " ".join( 1091 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1092 ) 1093 this_schema = self.schema_columns_sql(expression.this) 1094 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1095 1096 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1097 1098 return super().createable_sql(expression, locations)
1100 def create_sql(self, expression: exp.Create) -> str: 1101 # The comment property comes last in CTAS statements, i.e. after the query 1102 query = expression.expression 1103 if isinstance(query, exp.Query): 1104 comment_prop = expression.find(exp.SchemaCommentProperty) 1105 if comment_prop: 1106 comment_prop.pop() 1107 query.replace(exp.paren(query)) 1108 else: 1109 comment_prop = None 1110 1111 create_sql = super().create_sql(expression) 1112 1113 comment_sql = self.sql(comment_prop) 1114 comment_sql = f" {comment_sql}" if comment_sql else "" 1115 1116 return f"{create_sql}{comment_sql}"
1122 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1123 this = self.sql(expression, "this") 1124 this = f" {this}" if this else "" 1125 expr = self.sql(expression, "expression") 1126 expr = f" {expr}" if expr else "" 1127 index_type = self.sql(expression, "index_type") 1128 index_type = f" TYPE {index_type}" if index_type else "" 1129 granularity = self.sql(expression, "granularity") 1130 granularity = f" GRANULARITY {granularity}" if granularity else "" 1131 1132 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql