sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4import datetime 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 NormalizationStrategy, 10 arg_max_or_min_no_count, 11 build_date_delta, 12 build_formatted_time, 13 inline_array_sql, 14 json_extract_segments, 15 json_path_key_only_name, 16 no_pivot_sql, 17 build_json_extract_path, 18 rename_func, 19 sha256_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28 29DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 30 31 32def _build_date_format(args: t.List) -> exp.TimeToStr: 33 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 34 35 timezone = seq_get(args, 2) 36 if timezone: 37 expr.set("zone", timezone) 38 39 return expr 40 41 42def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 43 scale = expression.args.get("scale") 44 timestamp = expression.this 45 46 if scale in (None, exp.UnixToTime.SECONDS): 47 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MILLIS: 49 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MICROS: 51 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.NANOS: 53 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 55 return self.func( 56 "fromUnixTimestamp", 57 exp.cast( 58 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 59 ), 60 ) 61 62 63def _lower_func(sql: str) -> str: 64 index = sql.index("(") 65 return sql[:index].lower() + sql[index:] 66 67 68def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 69 quantile = expression.args["quantile"] 70 args = f"({self.sql(expression, 'this')})" 71 72 if isinstance(quantile, exp.Array): 73 func = self.func("quantiles", *quantile) 74 else: 75 func = self.func("quantile", quantile) 76 77 return func + args 78 79 80def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 81 if len(args) == 1: 82 return exp.CountIf(this=seq_get(args, 0)) 83 84 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 85 86 87def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 88 if len(args) == 3: 89 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 90 91 strtodate = exp.StrToDate.from_arg_list(args) 92 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 93 94 95def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 96 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 97 if not expression.unit: 98 return rename_func(name)(self, expression) 99 100 return self.func( 101 name, 102 unit_to_var(expression), 103 expression.expression, 104 expression.this, 105 ) 106 107 return _delta_sql 108 109 110def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 111 tz = expression.args.get("zone") 112 datatype = exp.DataType.build(exp.DataType.Type.TIMESTAMP) 113 ts = expression.this 114 if tz: 115 # build a datatype that encodes the timezone as a type parameter, eg DateTime('America/Los_Angeles') 116 datatype = exp.DataType.build( 117 exp.DataType.Type.TIMESTAMPTZ, # Type.TIMESTAMPTZ maps to DateTime 118 expressions=[exp.DataTypeParam(this=tz)], 119 ) 120 121 if isinstance(ts, exp.Literal): 122 # strip the timezone out of the literal, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 123 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if it's part of the timestamp string 124 ts_without_tz = ( 125 datetime.datetime.fromisoformat(ts.name).replace(tzinfo=None).isoformat(sep=" ") 126 ) 127 ts = exp.Literal.string(ts_without_tz) 128 129 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 130 131 132class ClickHouse(Dialect): 133 NORMALIZE_FUNCTIONS: bool | str = False 134 NULL_ORDERING = "nulls_are_last" 135 SUPPORTS_USER_DEFINED_TYPES = False 136 SAFE_DIVISION = True 137 LOG_BASE_FIRST: t.Optional[bool] = None 138 FORCE_EARLY_ALIAS_REF_EXPANSION = True 139 140 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 141 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 142 143 UNESCAPED_SEQUENCES = { 144 "\\0": "\0", 145 } 146 147 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 148 149 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 150 exp.Except: False, 151 exp.Intersect: False, 152 exp.Union: None, 153 } 154 155 class Tokenizer(tokens.Tokenizer): 156 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 157 IDENTIFIERS = ['"', "`"] 158 STRING_ESCAPES = ["'", "\\"] 159 BIT_STRINGS = [("0b", "")] 160 HEX_STRINGS = [("0x", ""), ("0X", "")] 161 HEREDOC_STRINGS = ["$"] 162 163 KEYWORDS = { 164 **tokens.Tokenizer.KEYWORDS, 165 "ATTACH": TokenType.COMMAND, 166 "DATE32": TokenType.DATE32, 167 "DATETIME64": TokenType.DATETIME64, 168 "DICTIONARY": TokenType.DICTIONARY, 169 "ENUM8": TokenType.ENUM8, 170 "ENUM16": TokenType.ENUM16, 171 "FINAL": TokenType.FINAL, 172 "FIXEDSTRING": TokenType.FIXEDSTRING, 173 "FLOAT32": TokenType.FLOAT, 174 "FLOAT64": TokenType.DOUBLE, 175 "GLOBAL": TokenType.GLOBAL, 176 "INT256": TokenType.INT256, 177 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 178 "MAP": TokenType.MAP, 179 "NESTED": TokenType.NESTED, 180 "SAMPLE": TokenType.TABLE_SAMPLE, 181 "TUPLE": TokenType.STRUCT, 182 "UINT128": TokenType.UINT128, 183 "UINT16": TokenType.USMALLINT, 184 "UINT256": TokenType.UINT256, 185 "UINT32": TokenType.UINT, 186 "UINT64": TokenType.UBIGINT, 187 "UINT8": TokenType.UTINYINT, 188 "IPV4": TokenType.IPV4, 189 "IPV6": TokenType.IPV6, 190 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 191 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 192 "SYSTEM": TokenType.COMMAND, 193 "PREWHERE": TokenType.PREWHERE, 194 } 195 KEYWORDS.pop("/*+") 196 197 SINGLE_TOKENS = { 198 **tokens.Tokenizer.SINGLE_TOKENS, 199 "$": TokenType.HEREDOC_STRING, 200 } 201 202 class Parser(parser.Parser): 203 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 204 # * select x from t1 union all select x from t2 limit 1; 205 # * select x from t1 union all (select x from t2 limit 1); 206 MODIFIERS_ATTACHED_TO_SET_OP = False 207 INTERVAL_SPANS = False 208 209 FUNCTIONS = { 210 **parser.Parser.FUNCTIONS, 211 "ANY": exp.AnyValue.from_arg_list, 212 "ARRAYSUM": exp.ArraySum.from_arg_list, 213 "COUNTIF": _build_count_if, 214 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 215 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 217 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATE_FORMAT": _build_date_format, 219 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 220 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 221 "FORMATDATETIME": _build_date_format, 222 "JSONEXTRACTSTRING": build_json_extract_path( 223 exp.JSONExtractScalar, zero_based_indexing=False 224 ), 225 "MAP": parser.build_var_map, 226 "MATCH": exp.RegexpLike.from_arg_list, 227 "RANDCANONICAL": exp.Rand.from_arg_list, 228 "STR_TO_DATE": _build_str_to_date, 229 "TUPLE": exp.Struct.from_arg_list, 230 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 231 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 233 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "UNIQ": exp.ApproxDistinct.from_arg_list, 235 "XOR": lambda args: exp.Xor(expressions=args), 236 "MD5": exp.MD5Digest.from_arg_list, 237 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 238 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 239 } 240 241 AGG_FUNCTIONS = { 242 "count", 243 "min", 244 "max", 245 "sum", 246 "avg", 247 "any", 248 "stddevPop", 249 "stddevSamp", 250 "varPop", 251 "varSamp", 252 "corr", 253 "covarPop", 254 "covarSamp", 255 "entropy", 256 "exponentialMovingAverage", 257 "intervalLengthSum", 258 "kolmogorovSmirnovTest", 259 "mannWhitneyUTest", 260 "median", 261 "rankCorr", 262 "sumKahan", 263 "studentTTest", 264 "welchTTest", 265 "anyHeavy", 266 "anyLast", 267 "boundingRatio", 268 "first_value", 269 "last_value", 270 "argMin", 271 "argMax", 272 "avgWeighted", 273 "topK", 274 "topKWeighted", 275 "deltaSum", 276 "deltaSumTimestamp", 277 "groupArray", 278 "groupArrayLast", 279 "groupUniqArray", 280 "groupArrayInsertAt", 281 "groupArrayMovingAvg", 282 "groupArrayMovingSum", 283 "groupArraySample", 284 "groupBitAnd", 285 "groupBitOr", 286 "groupBitXor", 287 "groupBitmap", 288 "groupBitmapAnd", 289 "groupBitmapOr", 290 "groupBitmapXor", 291 "sumWithOverflow", 292 "sumMap", 293 "minMap", 294 "maxMap", 295 "skewSamp", 296 "skewPop", 297 "kurtSamp", 298 "kurtPop", 299 "uniq", 300 "uniqExact", 301 "uniqCombined", 302 "uniqCombined64", 303 "uniqHLL12", 304 "uniqTheta", 305 "quantile", 306 "quantiles", 307 "quantileExact", 308 "quantilesExact", 309 "quantileExactLow", 310 "quantilesExactLow", 311 "quantileExactHigh", 312 "quantilesExactHigh", 313 "quantileExactWeighted", 314 "quantilesExactWeighted", 315 "quantileTiming", 316 "quantilesTiming", 317 "quantileTimingWeighted", 318 "quantilesTimingWeighted", 319 "quantileDeterministic", 320 "quantilesDeterministic", 321 "quantileTDigest", 322 "quantilesTDigest", 323 "quantileTDigestWeighted", 324 "quantilesTDigestWeighted", 325 "quantileBFloat16", 326 "quantilesBFloat16", 327 "quantileBFloat16Weighted", 328 "quantilesBFloat16Weighted", 329 "simpleLinearRegression", 330 "stochasticLinearRegression", 331 "stochasticLogisticRegression", 332 "categoricalInformationValue", 333 "contingency", 334 "cramersV", 335 "cramersVBiasCorrected", 336 "theilsU", 337 "maxIntersections", 338 "maxIntersectionsPosition", 339 "meanZTest", 340 "quantileInterpolatedWeighted", 341 "quantilesInterpolatedWeighted", 342 "quantileGK", 343 "quantilesGK", 344 "sparkBar", 345 "sumCount", 346 "largestTriangleThreeBuckets", 347 "histogram", 348 "sequenceMatch", 349 "sequenceCount", 350 "windowFunnel", 351 "retention", 352 "uniqUpTo", 353 "sequenceNextNode", 354 "exponentialTimeDecayedAvg", 355 } 356 357 AGG_FUNCTIONS_SUFFIXES = [ 358 "If", 359 "Array", 360 "ArrayIf", 361 "Map", 362 "SimpleState", 363 "State", 364 "Merge", 365 "MergeState", 366 "ForEach", 367 "Distinct", 368 "OrDefault", 369 "OrNull", 370 "Resample", 371 "ArgMin", 372 "ArgMax", 373 ] 374 375 FUNC_TOKENS = { 376 *parser.Parser.FUNC_TOKENS, 377 TokenType.SET, 378 } 379 380 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 381 382 ID_VAR_TOKENS = { 383 *parser.Parser.ID_VAR_TOKENS, 384 TokenType.LIKE, 385 } 386 387 AGG_FUNC_MAPPING = ( 388 lambda functions, suffixes: { 389 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 390 } 391 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 392 393 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 394 395 FUNCTION_PARSERS = { 396 **parser.Parser.FUNCTION_PARSERS, 397 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 398 "QUANTILE": lambda self: self._parse_quantile(), 399 } 400 401 FUNCTION_PARSERS.pop("MATCH") 402 403 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 404 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 405 406 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 407 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 408 409 RANGE_PARSERS = { 410 **parser.Parser.RANGE_PARSERS, 411 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 412 and self._parse_in(this, is_global=True), 413 } 414 415 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 416 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 417 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 418 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 419 420 JOIN_KINDS = { 421 *parser.Parser.JOIN_KINDS, 422 TokenType.ANY, 423 TokenType.ASOF, 424 TokenType.ARRAY, 425 } 426 427 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 428 TokenType.ANY, 429 TokenType.ARRAY, 430 TokenType.FINAL, 431 TokenType.FORMAT, 432 TokenType.SETTINGS, 433 } 434 435 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 436 TokenType.FORMAT, 437 } 438 439 LOG_DEFAULTS_TO_LN = True 440 441 QUERY_MODIFIER_PARSERS = { 442 **parser.Parser.QUERY_MODIFIER_PARSERS, 443 TokenType.SETTINGS: lambda self: ( 444 "settings", 445 self._advance() or self._parse_csv(self._parse_assignment), 446 ), 447 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 448 } 449 450 CONSTRAINT_PARSERS = { 451 **parser.Parser.CONSTRAINT_PARSERS, 452 "INDEX": lambda self: self._parse_index_constraint(), 453 "CODEC": lambda self: self._parse_compress(), 454 } 455 456 ALTER_PARSERS = { 457 **parser.Parser.ALTER_PARSERS, 458 "REPLACE": lambda self: self._parse_alter_table_replace(), 459 } 460 461 SCHEMA_UNNAMED_CONSTRAINTS = { 462 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 463 "INDEX", 464 } 465 466 PLACEHOLDER_PARSERS = { 467 **parser.Parser.PLACEHOLDER_PARSERS, 468 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 469 } 470 471 def _parse_types( 472 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 473 ) -> t.Optional[exp.Expression]: 474 dtype = super()._parse_types( 475 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 476 ) 477 if isinstance(dtype, exp.DataType): 478 # Mark every type as non-nullable which is ClickHouse's default. This marker 479 # helps us transpile types from other dialects to ClickHouse, so that we can 480 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 481 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 482 # `Nullable` type constructor 483 dtype.set("nullable", False) 484 485 return dtype 486 487 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 488 index = self._index 489 this = self._parse_bitwise() 490 if self._match(TokenType.FROM): 491 self._retreat(index) 492 return super()._parse_extract() 493 494 # We return Anonymous here because extract and regexpExtract have different semantics, 495 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 496 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 497 # 498 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 499 self._match(TokenType.COMMA) 500 return self.expression( 501 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 502 ) 503 504 def _parse_assignment(self) -> t.Optional[exp.Expression]: 505 this = super()._parse_assignment() 506 507 if self._match(TokenType.PLACEHOLDER): 508 return self.expression( 509 exp.If, 510 this=this, 511 true=self._parse_assignment(), 512 false=self._match(TokenType.COLON) and self._parse_assignment(), 513 ) 514 515 return this 516 517 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 518 """ 519 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 520 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 521 """ 522 this = self._parse_id_var() 523 self._match(TokenType.COLON) 524 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 525 self._match_text_seq("IDENTIFIER") and "Identifier" 526 ) 527 528 if not kind: 529 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 530 elif not self._match(TokenType.R_BRACE): 531 self.raise_error("Expecting }") 532 533 return self.expression(exp.Placeholder, this=this, kind=kind) 534 535 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 536 this = super()._parse_in(this) 537 this.set("is_global", is_global) 538 return this 539 540 def _parse_table( 541 self, 542 schema: bool = False, 543 joins: bool = False, 544 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 545 parse_bracket: bool = False, 546 is_db_reference: bool = False, 547 parse_partition: bool = False, 548 ) -> t.Optional[exp.Expression]: 549 this = super()._parse_table( 550 schema=schema, 551 joins=joins, 552 alias_tokens=alias_tokens, 553 parse_bracket=parse_bracket, 554 is_db_reference=is_db_reference, 555 ) 556 557 if self._match(TokenType.FINAL): 558 this = self.expression(exp.Final, this=this) 559 560 return this 561 562 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 563 return super()._parse_position(haystack_first=True) 564 565 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 566 def _parse_cte(self) -> exp.CTE: 567 # WITH <identifier> AS <subquery expression> 568 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 569 570 if not cte: 571 # WITH <expression> AS <identifier> 572 cte = self.expression( 573 exp.CTE, 574 this=self._parse_assignment(), 575 alias=self._parse_table_alias(), 576 scalar=True, 577 ) 578 579 return cte 580 581 def _parse_join_parts( 582 self, 583 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 584 is_global = self._match(TokenType.GLOBAL) and self._prev 585 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 586 587 if kind_pre: 588 kind = self._match_set(self.JOIN_KINDS) and self._prev 589 side = self._match_set(self.JOIN_SIDES) and self._prev 590 return is_global, side, kind 591 592 return ( 593 is_global, 594 self._match_set(self.JOIN_SIDES) and self._prev, 595 self._match_set(self.JOIN_KINDS) and self._prev, 596 ) 597 598 def _parse_join( 599 self, skip_join_token: bool = False, parse_bracket: bool = False 600 ) -> t.Optional[exp.Join]: 601 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 602 if join: 603 join.set("global", join.args.pop("method", None)) 604 605 return join 606 607 def _parse_function( 608 self, 609 functions: t.Optional[t.Dict[str, t.Callable]] = None, 610 anonymous: bool = False, 611 optional_parens: bool = True, 612 any_token: bool = False, 613 ) -> t.Optional[exp.Expression]: 614 expr = super()._parse_function( 615 functions=functions, 616 anonymous=anonymous, 617 optional_parens=optional_parens, 618 any_token=any_token, 619 ) 620 621 func = expr.this if isinstance(expr, exp.Window) else expr 622 623 # Aggregate functions can be split in 2 parts: <func_name><suffix> 624 parts = ( 625 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 626 ) 627 628 if parts: 629 params = self._parse_func_params(func) 630 631 kwargs = { 632 "this": func.this, 633 "expressions": func.expressions, 634 } 635 if parts[1]: 636 kwargs["parts"] = parts 637 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 638 else: 639 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 640 641 kwargs["exp_class"] = exp_class 642 if params: 643 kwargs["params"] = params 644 645 func = self.expression(**kwargs) 646 647 if isinstance(expr, exp.Window): 648 # The window's func was parsed as Anonymous in base parser, fix its 649 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 650 expr.set("this", func) 651 elif params: 652 # Params have blocked super()._parse_function() from parsing the following window 653 # (if that exists) as they're standing between the function call and the window spec 654 expr = self._parse_window(func) 655 else: 656 expr = func 657 658 return expr 659 660 def _parse_func_params( 661 self, this: t.Optional[exp.Func] = None 662 ) -> t.Optional[t.List[exp.Expression]]: 663 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 664 return self._parse_csv(self._parse_lambda) 665 666 if self._match(TokenType.L_PAREN): 667 params = self._parse_csv(self._parse_lambda) 668 self._match_r_paren(this) 669 return params 670 671 return None 672 673 def _parse_quantile(self) -> exp.Quantile: 674 this = self._parse_lambda() 675 params = self._parse_func_params() 676 if params: 677 return self.expression(exp.Quantile, this=params[0], quantile=this) 678 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 679 680 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 681 return super()._parse_wrapped_id_vars(optional=True) 682 683 def _parse_primary_key( 684 self, wrapped_optional: bool = False, in_props: bool = False 685 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 686 return super()._parse_primary_key( 687 wrapped_optional=wrapped_optional or in_props, in_props=in_props 688 ) 689 690 def _parse_on_property(self) -> t.Optional[exp.Expression]: 691 index = self._index 692 if self._match_text_seq("CLUSTER"): 693 this = self._parse_id_var() 694 if this: 695 return self.expression(exp.OnCluster, this=this) 696 else: 697 self._retreat(index) 698 return None 699 700 def _parse_index_constraint( 701 self, kind: t.Optional[str] = None 702 ) -> exp.IndexColumnConstraint: 703 # INDEX name1 expr TYPE type1(args) GRANULARITY value 704 this = self._parse_id_var() 705 expression = self._parse_assignment() 706 707 index_type = self._match_text_seq("TYPE") and ( 708 self._parse_function() or self._parse_var() 709 ) 710 711 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 712 713 return self.expression( 714 exp.IndexColumnConstraint, 715 this=this, 716 expression=expression, 717 index_type=index_type, 718 granularity=granularity, 719 ) 720 721 def _parse_partition(self) -> t.Optional[exp.Partition]: 722 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 723 if not self._match(TokenType.PARTITION): 724 return None 725 726 if self._match_text_seq("ID"): 727 # Corresponds to the PARTITION ID <string_value> syntax 728 expressions: t.List[exp.Expression] = [ 729 self.expression(exp.PartitionId, this=self._parse_string()) 730 ] 731 else: 732 expressions = self._parse_expressions() 733 734 return self.expression(exp.Partition, expressions=expressions) 735 736 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 737 partition = self._parse_partition() 738 739 if not partition or not self._match(TokenType.FROM): 740 return None 741 742 return self.expression( 743 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 744 ) 745 746 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 747 if not self._match_text_seq("PROJECTION"): 748 return None 749 750 return self.expression( 751 exp.ProjectionDef, 752 this=self._parse_id_var(), 753 expression=self._parse_wrapped(self._parse_statement), 754 ) 755 756 def _parse_constraint(self) -> t.Optional[exp.Expression]: 757 return super()._parse_constraint() or self._parse_projection_def() 758 759 class Generator(generator.Generator): 760 QUERY_HINTS = False 761 STRUCT_DELIMITER = ("(", ")") 762 NVL2_SUPPORTED = False 763 TABLESAMPLE_REQUIRES_PARENS = False 764 TABLESAMPLE_SIZE_IS_ROWS = False 765 TABLESAMPLE_KEYWORDS = "SAMPLE" 766 LAST_DAY_SUPPORTS_DATE_PART = False 767 CAN_IMPLEMENT_ARRAY_ANY = True 768 SUPPORTS_TO_NUMBER = False 769 JOIN_HINTS = False 770 TABLE_HINTS = False 771 GROUPINGS_SEP = "" 772 SET_OP_MODIFIERS = False 773 SUPPORTS_TABLE_ALIAS_COLUMNS = False 774 VALUES_AS_TABLE = False 775 776 STRING_TYPE_MAPPING = { 777 exp.DataType.Type.CHAR: "String", 778 exp.DataType.Type.LONGBLOB: "String", 779 exp.DataType.Type.LONGTEXT: "String", 780 exp.DataType.Type.MEDIUMBLOB: "String", 781 exp.DataType.Type.MEDIUMTEXT: "String", 782 exp.DataType.Type.TINYBLOB: "String", 783 exp.DataType.Type.TINYTEXT: "String", 784 exp.DataType.Type.TEXT: "String", 785 exp.DataType.Type.VARBINARY: "String", 786 exp.DataType.Type.VARCHAR: "String", 787 } 788 789 SUPPORTED_JSON_PATH_PARTS = { 790 exp.JSONPathKey, 791 exp.JSONPathRoot, 792 exp.JSONPathSubscript, 793 } 794 795 TYPE_MAPPING = { 796 **generator.Generator.TYPE_MAPPING, 797 **STRING_TYPE_MAPPING, 798 exp.DataType.Type.ARRAY: "Array", 799 exp.DataType.Type.BIGINT: "Int64", 800 exp.DataType.Type.DATE32: "Date32", 801 exp.DataType.Type.DATETIME: "DateTime", 802 exp.DataType.Type.DATETIME64: "DateTime64", 803 exp.DataType.Type.TIMESTAMP: "DateTime", 804 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 805 exp.DataType.Type.DOUBLE: "Float64", 806 exp.DataType.Type.ENUM: "Enum", 807 exp.DataType.Type.ENUM8: "Enum8", 808 exp.DataType.Type.ENUM16: "Enum16", 809 exp.DataType.Type.FIXEDSTRING: "FixedString", 810 exp.DataType.Type.FLOAT: "Float32", 811 exp.DataType.Type.INT: "Int32", 812 exp.DataType.Type.MEDIUMINT: "Int32", 813 exp.DataType.Type.INT128: "Int128", 814 exp.DataType.Type.INT256: "Int256", 815 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 816 exp.DataType.Type.MAP: "Map", 817 exp.DataType.Type.NESTED: "Nested", 818 exp.DataType.Type.NULLABLE: "Nullable", 819 exp.DataType.Type.SMALLINT: "Int16", 820 exp.DataType.Type.STRUCT: "Tuple", 821 exp.DataType.Type.TINYINT: "Int8", 822 exp.DataType.Type.UBIGINT: "UInt64", 823 exp.DataType.Type.UINT: "UInt32", 824 exp.DataType.Type.UINT128: "UInt128", 825 exp.DataType.Type.UINT256: "UInt256", 826 exp.DataType.Type.USMALLINT: "UInt16", 827 exp.DataType.Type.UTINYINT: "UInt8", 828 exp.DataType.Type.IPV4: "IPv4", 829 exp.DataType.Type.IPV6: "IPv6", 830 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 831 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 832 } 833 834 TRANSFORMS = { 835 **generator.Generator.TRANSFORMS, 836 exp.AnyValue: rename_func("any"), 837 exp.ApproxDistinct: rename_func("uniq"), 838 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 839 exp.ArraySize: rename_func("LENGTH"), 840 exp.ArraySum: rename_func("arraySum"), 841 exp.ArgMax: arg_max_or_min_no_count("argMax"), 842 exp.ArgMin: arg_max_or_min_no_count("argMin"), 843 exp.Array: inline_array_sql, 844 exp.CastToStrType: rename_func("CAST"), 845 exp.CountIf: rename_func("countIf"), 846 exp.CompressColumnConstraint: lambda self, 847 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 848 exp.ComputedColumnConstraint: lambda self, 849 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 850 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 851 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 852 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 853 exp.DateStrToDate: rename_func("toDate"), 854 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 855 exp.Explode: rename_func("arrayJoin"), 856 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 857 exp.IsNan: rename_func("isNaN"), 858 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 859 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 860 exp.JSONPathKey: json_path_key_only_name, 861 exp.JSONPathRoot: lambda *_: "", 862 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 863 exp.Nullif: rename_func("nullIf"), 864 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 865 exp.Pivot: no_pivot_sql, 866 exp.Quantile: _quantile_sql, 867 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 868 exp.Rand: rename_func("randCanonical"), 869 exp.StartsWith: rename_func("startsWith"), 870 exp.StrPosition: lambda self, e: self.func( 871 "position", e.this, e.args.get("substr"), e.args.get("position") 872 ), 873 exp.TimeToStr: lambda self, e: self.func( 874 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("zone") 875 ), 876 exp.TimeStrToTime: _timestrtotime_sql, 877 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 878 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 879 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 880 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 881 exp.MD5Digest: rename_func("MD5"), 882 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 883 exp.SHA: rename_func("SHA1"), 884 exp.SHA2: sha256_sql, 885 exp.UnixToTime: _unix_to_time_sql, 886 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 887 exp.Trim: trim_sql, 888 exp.Variance: rename_func("varSamp"), 889 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 890 exp.Stddev: rename_func("stddevSamp"), 891 exp.Chr: lambda self, e: self.func("char", e.this), 892 exp.Lag: lambda self, e: self.func( 893 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 894 ), 895 exp.Lead: lambda self, e: self.func( 896 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 897 ), 898 } 899 900 PROPERTIES_LOCATION = { 901 **generator.Generator.PROPERTIES_LOCATION, 902 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 903 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 904 exp.OnCluster: exp.Properties.Location.POST_NAME, 905 } 906 907 # There's no list in docs, but it can be found in Clickhouse code 908 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 909 ON_CLUSTER_TARGETS = { 910 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 911 "DATABASE", 912 "TABLE", 913 "VIEW", 914 "DICTIONARY", 915 "INDEX", 916 "FUNCTION", 917 "NAMED COLLECTION", 918 } 919 920 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 921 NON_NULLABLE_TYPES = { 922 exp.DataType.Type.ARRAY, 923 exp.DataType.Type.MAP, 924 exp.DataType.Type.NULLABLE, 925 exp.DataType.Type.STRUCT, 926 } 927 928 def strtodate_sql(self, expression: exp.StrToDate) -> str: 929 strtodate_sql = self.function_fallback_sql(expression) 930 931 if not isinstance(expression.parent, exp.Cast): 932 # StrToDate returns DATEs in other dialects (eg. postgres), so 933 # this branch aims to improve the transpilation to clickhouse 934 return f"CAST({strtodate_sql} AS DATE)" 935 936 return strtodate_sql 937 938 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 939 this = expression.this 940 941 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 942 return self.sql(this) 943 944 return super().cast_sql(expression, safe_prefix=safe_prefix) 945 946 def trycast_sql(self, expression: exp.TryCast) -> str: 947 dtype = expression.to 948 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 949 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 950 dtype.set("nullable", True) 951 952 return super().cast_sql(expression) 953 954 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 955 this = self.json_path_part(expression.this) 956 return str(int(this) + 1) if is_int(this) else this 957 958 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 959 return f"AS {self.sql(expression, 'this')}" 960 961 def _any_to_has( 962 self, 963 expression: exp.EQ | exp.NEQ, 964 default: t.Callable[[t.Any], str], 965 prefix: str = "", 966 ) -> str: 967 if isinstance(expression.left, exp.Any): 968 arr = expression.left 969 this = expression.right 970 elif isinstance(expression.right, exp.Any): 971 arr = expression.right 972 this = expression.left 973 else: 974 return default(expression) 975 976 return prefix + self.func("has", arr.this.unnest(), this) 977 978 def eq_sql(self, expression: exp.EQ) -> str: 979 return self._any_to_has(expression, super().eq_sql) 980 981 def neq_sql(self, expression: exp.NEQ) -> str: 982 return self._any_to_has(expression, super().neq_sql, "NOT ") 983 984 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 985 # Manually add a flag to make the search case-insensitive 986 regex = self.func("CONCAT", "'(?i)'", expression.expression) 987 return self.func("match", expression.this, regex) 988 989 def datatype_sql(self, expression: exp.DataType) -> str: 990 # String is the standard ClickHouse type, every other variant is just an alias. 991 # Additionally, any supplied length parameter will be ignored. 992 # 993 # https://clickhouse.com/docs/en/sql-reference/data-types/string 994 if expression.this in self.STRING_TYPE_MAPPING: 995 dtype = "String" 996 else: 997 dtype = super().datatype_sql(expression) 998 999 # This section changes the type to `Nullable(...)` if the following conditions hold: 1000 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1001 # and change their semantics 1002 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1003 # constraint: "Type of Map key must be a type, that can be represented by integer or 1004 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1005 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1006 parent = expression.parent 1007 if ( 1008 expression.args.get("nullable") is not False 1009 and not ( 1010 isinstance(parent, exp.DataType) 1011 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1012 and expression.index in (None, 0) 1013 ) 1014 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1015 ): 1016 dtype = f"Nullable({dtype})" 1017 1018 return dtype 1019 1020 def cte_sql(self, expression: exp.CTE) -> str: 1021 if expression.args.get("scalar"): 1022 this = self.sql(expression, "this") 1023 alias = self.sql(expression, "alias") 1024 return f"{this} AS {alias}" 1025 1026 return super().cte_sql(expression) 1027 1028 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1029 return super().after_limit_modifiers(expression) + [ 1030 ( 1031 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1032 if expression.args.get("settings") 1033 else "" 1034 ), 1035 ( 1036 self.seg("FORMAT ") + self.sql(expression, "format") 1037 if expression.args.get("format") 1038 else "" 1039 ), 1040 ] 1041 1042 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1043 params = self.expressions(expression, key="params", flat=True) 1044 return self.func(expression.name, *expression.expressions) + f"({params})" 1045 1046 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1047 return self.func(expression.name, *expression.expressions) 1048 1049 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1050 return self.anonymousaggfunc_sql(expression) 1051 1052 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1053 return self.parameterizedagg_sql(expression) 1054 1055 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1056 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1057 1058 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1059 return f"ON CLUSTER {self.sql(expression, 'this')}" 1060 1061 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1062 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1063 exp.Properties.Location.POST_NAME 1064 ): 1065 this_name = self.sql( 1066 expression.this if isinstance(expression.this, exp.Schema) else expression, 1067 "this", 1068 ) 1069 this_properties = " ".join( 1070 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1071 ) 1072 this_schema = self.schema_columns_sql(expression.this) 1073 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1074 1075 return super().createable_sql(expression, locations) 1076 1077 def create_sql(self, expression: exp.Create) -> str: 1078 # The comment property comes last in CTAS statements, i.e. after the query 1079 query = expression.expression 1080 if isinstance(query, exp.Query): 1081 comment_prop = expression.find(exp.SchemaCommentProperty) 1082 if comment_prop: 1083 comment_prop.pop() 1084 query.replace(exp.paren(query)) 1085 else: 1086 comment_prop = None 1087 1088 create_sql = super().create_sql(expression) 1089 1090 comment_sql = self.sql(comment_prop) 1091 comment_sql = f" {comment_sql}" if comment_sql else "" 1092 1093 return f"{create_sql}{comment_sql}" 1094 1095 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1096 this = self.indent(self.sql(expression, "this")) 1097 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1098 1099 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1100 this = self.sql(expression, "this") 1101 this = f" {this}" if this else "" 1102 expr = self.sql(expression, "expression") 1103 expr = f" {expr}" if expr else "" 1104 index_type = self.sql(expression, "index_type") 1105 index_type = f" TYPE {index_type}" if index_type else "" 1106 granularity = self.sql(expression, "granularity") 1107 granularity = f" GRANULARITY {granularity}" if granularity else "" 1108 1109 return f"INDEX{this}{expr}{index_type}{granularity}" 1110 1111 def partition_sql(self, expression: exp.Partition) -> str: 1112 return f"PARTITION {self.expressions(expression, flat=True)}" 1113 1114 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1115 return f"ID {self.sql(expression.this)}" 1116 1117 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1118 return ( 1119 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1120 ) 1121 1122 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1123 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
133class ClickHouse(Dialect): 134 NORMALIZE_FUNCTIONS: bool | str = False 135 NULL_ORDERING = "nulls_are_last" 136 SUPPORTS_USER_DEFINED_TYPES = False 137 SAFE_DIVISION = True 138 LOG_BASE_FIRST: t.Optional[bool] = None 139 FORCE_EARLY_ALIAS_REF_EXPANSION = True 140 141 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 142 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 143 144 UNESCAPED_SEQUENCES = { 145 "\\0": "\0", 146 } 147 148 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 149 150 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 151 exp.Except: False, 152 exp.Intersect: False, 153 exp.Union: None, 154 } 155 156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 } 202 203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 } 401 402 FUNCTION_PARSERS.pop("MATCH") 403 404 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 405 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 406 407 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 408 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 409 410 RANGE_PARSERS = { 411 **parser.Parser.RANGE_PARSERS, 412 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 413 and self._parse_in(this, is_global=True), 414 } 415 416 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 417 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 418 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 419 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 420 421 JOIN_KINDS = { 422 *parser.Parser.JOIN_KINDS, 423 TokenType.ANY, 424 TokenType.ASOF, 425 TokenType.ARRAY, 426 } 427 428 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 429 TokenType.ANY, 430 TokenType.ARRAY, 431 TokenType.FINAL, 432 TokenType.FORMAT, 433 TokenType.SETTINGS, 434 } 435 436 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 437 TokenType.FORMAT, 438 } 439 440 LOG_DEFAULTS_TO_LN = True 441 442 QUERY_MODIFIER_PARSERS = { 443 **parser.Parser.QUERY_MODIFIER_PARSERS, 444 TokenType.SETTINGS: lambda self: ( 445 "settings", 446 self._advance() or self._parse_csv(self._parse_assignment), 447 ), 448 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 449 } 450 451 CONSTRAINT_PARSERS = { 452 **parser.Parser.CONSTRAINT_PARSERS, 453 "INDEX": lambda self: self._parse_index_constraint(), 454 "CODEC": lambda self: self._parse_compress(), 455 } 456 457 ALTER_PARSERS = { 458 **parser.Parser.ALTER_PARSERS, 459 "REPLACE": lambda self: self._parse_alter_table_replace(), 460 } 461 462 SCHEMA_UNNAMED_CONSTRAINTS = { 463 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 464 "INDEX", 465 } 466 467 PLACEHOLDER_PARSERS = { 468 **parser.Parser.PLACEHOLDER_PARSERS, 469 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 470 } 471 472 def _parse_types( 473 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 474 ) -> t.Optional[exp.Expression]: 475 dtype = super()._parse_types( 476 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 477 ) 478 if isinstance(dtype, exp.DataType): 479 # Mark every type as non-nullable which is ClickHouse's default. This marker 480 # helps us transpile types from other dialects to ClickHouse, so that we can 481 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 482 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 483 # `Nullable` type constructor 484 dtype.set("nullable", False) 485 486 return dtype 487 488 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 489 index = self._index 490 this = self._parse_bitwise() 491 if self._match(TokenType.FROM): 492 self._retreat(index) 493 return super()._parse_extract() 494 495 # We return Anonymous here because extract and regexpExtract have different semantics, 496 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 497 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 498 # 499 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 500 self._match(TokenType.COMMA) 501 return self.expression( 502 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 503 ) 504 505 def _parse_assignment(self) -> t.Optional[exp.Expression]: 506 this = super()._parse_assignment() 507 508 if self._match(TokenType.PLACEHOLDER): 509 return self.expression( 510 exp.If, 511 this=this, 512 true=self._parse_assignment(), 513 false=self._match(TokenType.COLON) and self._parse_assignment(), 514 ) 515 516 return this 517 518 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 519 """ 520 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 521 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 522 """ 523 this = self._parse_id_var() 524 self._match(TokenType.COLON) 525 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 526 self._match_text_seq("IDENTIFIER") and "Identifier" 527 ) 528 529 if not kind: 530 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 531 elif not self._match(TokenType.R_BRACE): 532 self.raise_error("Expecting }") 533 534 return self.expression(exp.Placeholder, this=this, kind=kind) 535 536 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 537 this = super()._parse_in(this) 538 this.set("is_global", is_global) 539 return this 540 541 def _parse_table( 542 self, 543 schema: bool = False, 544 joins: bool = False, 545 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 546 parse_bracket: bool = False, 547 is_db_reference: bool = False, 548 parse_partition: bool = False, 549 ) -> t.Optional[exp.Expression]: 550 this = super()._parse_table( 551 schema=schema, 552 joins=joins, 553 alias_tokens=alias_tokens, 554 parse_bracket=parse_bracket, 555 is_db_reference=is_db_reference, 556 ) 557 558 if self._match(TokenType.FINAL): 559 this = self.expression(exp.Final, this=this) 560 561 return this 562 563 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 564 return super()._parse_position(haystack_first=True) 565 566 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 567 def _parse_cte(self) -> exp.CTE: 568 # WITH <identifier> AS <subquery expression> 569 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 570 571 if not cte: 572 # WITH <expression> AS <identifier> 573 cte = self.expression( 574 exp.CTE, 575 this=self._parse_assignment(), 576 alias=self._parse_table_alias(), 577 scalar=True, 578 ) 579 580 return cte 581 582 def _parse_join_parts( 583 self, 584 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 585 is_global = self._match(TokenType.GLOBAL) and self._prev 586 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 587 588 if kind_pre: 589 kind = self._match_set(self.JOIN_KINDS) and self._prev 590 side = self._match_set(self.JOIN_SIDES) and self._prev 591 return is_global, side, kind 592 593 return ( 594 is_global, 595 self._match_set(self.JOIN_SIDES) and self._prev, 596 self._match_set(self.JOIN_KINDS) and self._prev, 597 ) 598 599 def _parse_join( 600 self, skip_join_token: bool = False, parse_bracket: bool = False 601 ) -> t.Optional[exp.Join]: 602 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 603 if join: 604 join.set("global", join.args.pop("method", None)) 605 606 return join 607 608 def _parse_function( 609 self, 610 functions: t.Optional[t.Dict[str, t.Callable]] = None, 611 anonymous: bool = False, 612 optional_parens: bool = True, 613 any_token: bool = False, 614 ) -> t.Optional[exp.Expression]: 615 expr = super()._parse_function( 616 functions=functions, 617 anonymous=anonymous, 618 optional_parens=optional_parens, 619 any_token=any_token, 620 ) 621 622 func = expr.this if isinstance(expr, exp.Window) else expr 623 624 # Aggregate functions can be split in 2 parts: <func_name><suffix> 625 parts = ( 626 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 627 ) 628 629 if parts: 630 params = self._parse_func_params(func) 631 632 kwargs = { 633 "this": func.this, 634 "expressions": func.expressions, 635 } 636 if parts[1]: 637 kwargs["parts"] = parts 638 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 639 else: 640 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 641 642 kwargs["exp_class"] = exp_class 643 if params: 644 kwargs["params"] = params 645 646 func = self.expression(**kwargs) 647 648 if isinstance(expr, exp.Window): 649 # The window's func was parsed as Anonymous in base parser, fix its 650 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 651 expr.set("this", func) 652 elif params: 653 # Params have blocked super()._parse_function() from parsing the following window 654 # (if that exists) as they're standing between the function call and the window spec 655 expr = self._parse_window(func) 656 else: 657 expr = func 658 659 return expr 660 661 def _parse_func_params( 662 self, this: t.Optional[exp.Func] = None 663 ) -> t.Optional[t.List[exp.Expression]]: 664 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 665 return self._parse_csv(self._parse_lambda) 666 667 if self._match(TokenType.L_PAREN): 668 params = self._parse_csv(self._parse_lambda) 669 self._match_r_paren(this) 670 return params 671 672 return None 673 674 def _parse_quantile(self) -> exp.Quantile: 675 this = self._parse_lambda() 676 params = self._parse_func_params() 677 if params: 678 return self.expression(exp.Quantile, this=params[0], quantile=this) 679 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 680 681 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 682 return super()._parse_wrapped_id_vars(optional=True) 683 684 def _parse_primary_key( 685 self, wrapped_optional: bool = False, in_props: bool = False 686 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 687 return super()._parse_primary_key( 688 wrapped_optional=wrapped_optional or in_props, in_props=in_props 689 ) 690 691 def _parse_on_property(self) -> t.Optional[exp.Expression]: 692 index = self._index 693 if self._match_text_seq("CLUSTER"): 694 this = self._parse_id_var() 695 if this: 696 return self.expression(exp.OnCluster, this=this) 697 else: 698 self._retreat(index) 699 return None 700 701 def _parse_index_constraint( 702 self, kind: t.Optional[str] = None 703 ) -> exp.IndexColumnConstraint: 704 # INDEX name1 expr TYPE type1(args) GRANULARITY value 705 this = self._parse_id_var() 706 expression = self._parse_assignment() 707 708 index_type = self._match_text_seq("TYPE") and ( 709 self._parse_function() or self._parse_var() 710 ) 711 712 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 713 714 return self.expression( 715 exp.IndexColumnConstraint, 716 this=this, 717 expression=expression, 718 index_type=index_type, 719 granularity=granularity, 720 ) 721 722 def _parse_partition(self) -> t.Optional[exp.Partition]: 723 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 724 if not self._match(TokenType.PARTITION): 725 return None 726 727 if self._match_text_seq("ID"): 728 # Corresponds to the PARTITION ID <string_value> syntax 729 expressions: t.List[exp.Expression] = [ 730 self.expression(exp.PartitionId, this=self._parse_string()) 731 ] 732 else: 733 expressions = self._parse_expressions() 734 735 return self.expression(exp.Partition, expressions=expressions) 736 737 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 738 partition = self._parse_partition() 739 740 if not partition or not self._match(TokenType.FROM): 741 return None 742 743 return self.expression( 744 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 745 ) 746 747 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 748 if not self._match_text_seq("PROJECTION"): 749 return None 750 751 return self.expression( 752 exp.ProjectionDef, 753 this=self._parse_id_var(), 754 expression=self._parse_wrapped(self._parse_statement), 755 ) 756 757 def _parse_constraint(self) -> t.Optional[exp.Expression]: 758 return super()._parse_constraint() or self._parse_projection_def() 759 760 class Generator(generator.Generator): 761 QUERY_HINTS = False 762 STRUCT_DELIMITER = ("(", ")") 763 NVL2_SUPPORTED = False 764 TABLESAMPLE_REQUIRES_PARENS = False 765 TABLESAMPLE_SIZE_IS_ROWS = False 766 TABLESAMPLE_KEYWORDS = "SAMPLE" 767 LAST_DAY_SUPPORTS_DATE_PART = False 768 CAN_IMPLEMENT_ARRAY_ANY = True 769 SUPPORTS_TO_NUMBER = False 770 JOIN_HINTS = False 771 TABLE_HINTS = False 772 GROUPINGS_SEP = "" 773 SET_OP_MODIFIERS = False 774 SUPPORTS_TABLE_ALIAS_COLUMNS = False 775 VALUES_AS_TABLE = False 776 777 STRING_TYPE_MAPPING = { 778 exp.DataType.Type.CHAR: "String", 779 exp.DataType.Type.LONGBLOB: "String", 780 exp.DataType.Type.LONGTEXT: "String", 781 exp.DataType.Type.MEDIUMBLOB: "String", 782 exp.DataType.Type.MEDIUMTEXT: "String", 783 exp.DataType.Type.TINYBLOB: "String", 784 exp.DataType.Type.TINYTEXT: "String", 785 exp.DataType.Type.TEXT: "String", 786 exp.DataType.Type.VARBINARY: "String", 787 exp.DataType.Type.VARCHAR: "String", 788 } 789 790 SUPPORTED_JSON_PATH_PARTS = { 791 exp.JSONPathKey, 792 exp.JSONPathRoot, 793 exp.JSONPathSubscript, 794 } 795 796 TYPE_MAPPING = { 797 **generator.Generator.TYPE_MAPPING, 798 **STRING_TYPE_MAPPING, 799 exp.DataType.Type.ARRAY: "Array", 800 exp.DataType.Type.BIGINT: "Int64", 801 exp.DataType.Type.DATE32: "Date32", 802 exp.DataType.Type.DATETIME: "DateTime", 803 exp.DataType.Type.DATETIME64: "DateTime64", 804 exp.DataType.Type.TIMESTAMP: "DateTime", 805 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 806 exp.DataType.Type.DOUBLE: "Float64", 807 exp.DataType.Type.ENUM: "Enum", 808 exp.DataType.Type.ENUM8: "Enum8", 809 exp.DataType.Type.ENUM16: "Enum16", 810 exp.DataType.Type.FIXEDSTRING: "FixedString", 811 exp.DataType.Type.FLOAT: "Float32", 812 exp.DataType.Type.INT: "Int32", 813 exp.DataType.Type.MEDIUMINT: "Int32", 814 exp.DataType.Type.INT128: "Int128", 815 exp.DataType.Type.INT256: "Int256", 816 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 817 exp.DataType.Type.MAP: "Map", 818 exp.DataType.Type.NESTED: "Nested", 819 exp.DataType.Type.NULLABLE: "Nullable", 820 exp.DataType.Type.SMALLINT: "Int16", 821 exp.DataType.Type.STRUCT: "Tuple", 822 exp.DataType.Type.TINYINT: "Int8", 823 exp.DataType.Type.UBIGINT: "UInt64", 824 exp.DataType.Type.UINT: "UInt32", 825 exp.DataType.Type.UINT128: "UInt128", 826 exp.DataType.Type.UINT256: "UInt256", 827 exp.DataType.Type.USMALLINT: "UInt16", 828 exp.DataType.Type.UTINYINT: "UInt8", 829 exp.DataType.Type.IPV4: "IPv4", 830 exp.DataType.Type.IPV6: "IPv6", 831 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 832 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 833 } 834 835 TRANSFORMS = { 836 **generator.Generator.TRANSFORMS, 837 exp.AnyValue: rename_func("any"), 838 exp.ApproxDistinct: rename_func("uniq"), 839 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 840 exp.ArraySize: rename_func("LENGTH"), 841 exp.ArraySum: rename_func("arraySum"), 842 exp.ArgMax: arg_max_or_min_no_count("argMax"), 843 exp.ArgMin: arg_max_or_min_no_count("argMin"), 844 exp.Array: inline_array_sql, 845 exp.CastToStrType: rename_func("CAST"), 846 exp.CountIf: rename_func("countIf"), 847 exp.CompressColumnConstraint: lambda self, 848 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 849 exp.ComputedColumnConstraint: lambda self, 850 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 851 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 852 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 853 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 854 exp.DateStrToDate: rename_func("toDate"), 855 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 856 exp.Explode: rename_func("arrayJoin"), 857 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 858 exp.IsNan: rename_func("isNaN"), 859 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 860 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 861 exp.JSONPathKey: json_path_key_only_name, 862 exp.JSONPathRoot: lambda *_: "", 863 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 864 exp.Nullif: rename_func("nullIf"), 865 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 866 exp.Pivot: no_pivot_sql, 867 exp.Quantile: _quantile_sql, 868 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 869 exp.Rand: rename_func("randCanonical"), 870 exp.StartsWith: rename_func("startsWith"), 871 exp.StrPosition: lambda self, e: self.func( 872 "position", e.this, e.args.get("substr"), e.args.get("position") 873 ), 874 exp.TimeToStr: lambda self, e: self.func( 875 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("zone") 876 ), 877 exp.TimeStrToTime: _timestrtotime_sql, 878 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 879 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 880 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 881 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 882 exp.MD5Digest: rename_func("MD5"), 883 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 884 exp.SHA: rename_func("SHA1"), 885 exp.SHA2: sha256_sql, 886 exp.UnixToTime: _unix_to_time_sql, 887 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 888 exp.Trim: trim_sql, 889 exp.Variance: rename_func("varSamp"), 890 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 891 exp.Stddev: rename_func("stddevSamp"), 892 exp.Chr: lambda self, e: self.func("char", e.this), 893 exp.Lag: lambda self, e: self.func( 894 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 895 ), 896 exp.Lead: lambda self, e: self.func( 897 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 898 ), 899 } 900 901 PROPERTIES_LOCATION = { 902 **generator.Generator.PROPERTIES_LOCATION, 903 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 904 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 905 exp.OnCluster: exp.Properties.Location.POST_NAME, 906 } 907 908 # There's no list in docs, but it can be found in Clickhouse code 909 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 910 ON_CLUSTER_TARGETS = { 911 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 912 "DATABASE", 913 "TABLE", 914 "VIEW", 915 "DICTIONARY", 916 "INDEX", 917 "FUNCTION", 918 "NAMED COLLECTION", 919 } 920 921 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 922 NON_NULLABLE_TYPES = { 923 exp.DataType.Type.ARRAY, 924 exp.DataType.Type.MAP, 925 exp.DataType.Type.NULLABLE, 926 exp.DataType.Type.STRUCT, 927 } 928 929 def strtodate_sql(self, expression: exp.StrToDate) -> str: 930 strtodate_sql = self.function_fallback_sql(expression) 931 932 if not isinstance(expression.parent, exp.Cast): 933 # StrToDate returns DATEs in other dialects (eg. postgres), so 934 # this branch aims to improve the transpilation to clickhouse 935 return f"CAST({strtodate_sql} AS DATE)" 936 937 return strtodate_sql 938 939 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 940 this = expression.this 941 942 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 943 return self.sql(this) 944 945 return super().cast_sql(expression, safe_prefix=safe_prefix) 946 947 def trycast_sql(self, expression: exp.TryCast) -> str: 948 dtype = expression.to 949 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 950 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 951 dtype.set("nullable", True) 952 953 return super().cast_sql(expression) 954 955 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 956 this = self.json_path_part(expression.this) 957 return str(int(this) + 1) if is_int(this) else this 958 959 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 960 return f"AS {self.sql(expression, 'this')}" 961 962 def _any_to_has( 963 self, 964 expression: exp.EQ | exp.NEQ, 965 default: t.Callable[[t.Any], str], 966 prefix: str = "", 967 ) -> str: 968 if isinstance(expression.left, exp.Any): 969 arr = expression.left 970 this = expression.right 971 elif isinstance(expression.right, exp.Any): 972 arr = expression.right 973 this = expression.left 974 else: 975 return default(expression) 976 977 return prefix + self.func("has", arr.this.unnest(), this) 978 979 def eq_sql(self, expression: exp.EQ) -> str: 980 return self._any_to_has(expression, super().eq_sql) 981 982 def neq_sql(self, expression: exp.NEQ) -> str: 983 return self._any_to_has(expression, super().neq_sql, "NOT ") 984 985 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 986 # Manually add a flag to make the search case-insensitive 987 regex = self.func("CONCAT", "'(?i)'", expression.expression) 988 return self.func("match", expression.this, regex) 989 990 def datatype_sql(self, expression: exp.DataType) -> str: 991 # String is the standard ClickHouse type, every other variant is just an alias. 992 # Additionally, any supplied length parameter will be ignored. 993 # 994 # https://clickhouse.com/docs/en/sql-reference/data-types/string 995 if expression.this in self.STRING_TYPE_MAPPING: 996 dtype = "String" 997 else: 998 dtype = super().datatype_sql(expression) 999 1000 # This section changes the type to `Nullable(...)` if the following conditions hold: 1001 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1002 # and change their semantics 1003 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1004 # constraint: "Type of Map key must be a type, that can be represented by integer or 1005 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1006 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1007 parent = expression.parent 1008 if ( 1009 expression.args.get("nullable") is not False 1010 and not ( 1011 isinstance(parent, exp.DataType) 1012 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1013 and expression.index in (None, 0) 1014 ) 1015 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1016 ): 1017 dtype = f"Nullable({dtype})" 1018 1019 return dtype 1020 1021 def cte_sql(self, expression: exp.CTE) -> str: 1022 if expression.args.get("scalar"): 1023 this = self.sql(expression, "this") 1024 alias = self.sql(expression, "alias") 1025 return f"{this} AS {alias}" 1026 1027 return super().cte_sql(expression) 1028 1029 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1030 return super().after_limit_modifiers(expression) + [ 1031 ( 1032 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1033 if expression.args.get("settings") 1034 else "" 1035 ), 1036 ( 1037 self.seg("FORMAT ") + self.sql(expression, "format") 1038 if expression.args.get("format") 1039 else "" 1040 ), 1041 ] 1042 1043 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1044 params = self.expressions(expression, key="params", flat=True) 1045 return self.func(expression.name, *expression.expressions) + f"({params})" 1046 1047 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1048 return self.func(expression.name, *expression.expressions) 1049 1050 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1051 return self.anonymousaggfunc_sql(expression) 1052 1053 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1054 return self.parameterizedagg_sql(expression) 1055 1056 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1057 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1058 1059 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1060 return f"ON CLUSTER {self.sql(expression, 'this')}" 1061 1062 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1063 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1064 exp.Properties.Location.POST_NAME 1065 ): 1066 this_name = self.sql( 1067 expression.this if isinstance(expression.this, exp.Schema) else expression, 1068 "this", 1069 ) 1070 this_properties = " ".join( 1071 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1072 ) 1073 this_schema = self.schema_columns_sql(expression.this) 1074 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1075 1076 return super().createable_sql(expression, locations) 1077 1078 def create_sql(self, expression: exp.Create) -> str: 1079 # The comment property comes last in CTAS statements, i.e. after the query 1080 query = expression.expression 1081 if isinstance(query, exp.Query): 1082 comment_prop = expression.find(exp.SchemaCommentProperty) 1083 if comment_prop: 1084 comment_prop.pop() 1085 query.replace(exp.paren(query)) 1086 else: 1087 comment_prop = None 1088 1089 create_sql = super().create_sql(expression) 1090 1091 comment_sql = self.sql(comment_prop) 1092 comment_sql = f" {comment_sql}" if comment_sql else "" 1093 1094 return f"{create_sql}{comment_sql}" 1095 1096 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1097 this = self.indent(self.sql(expression, "this")) 1098 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1099 1100 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1101 this = self.sql(expression, "this") 1102 this = f" {this}" if this else "" 1103 expr = self.sql(expression, "expression") 1104 expr = f" {expr}" if expr else "" 1105 index_type = self.sql(expression, "index_type") 1106 index_type = f" TYPE {index_type}" if index_type else "" 1107 granularity = self.sql(expression, "granularity") 1108 granularity = f" GRANULARITY {granularity}" if granularity else "" 1109 1110 return f"INDEX{this}{expr}{index_type}{granularity}" 1111 1112 def partition_sql(self, expression: exp.Partition) -> str: 1113 return f"PARTITION {self.expressions(expression, flat=True)}" 1114 1115 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1116 return f"ID {self.sql(expression.this)}" 1117 1118 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1119 return ( 1120 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1121 ) 1122 1123 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1124 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 } 401 402 FUNCTION_PARSERS.pop("MATCH") 403 404 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 405 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 406 407 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 408 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 409 410 RANGE_PARSERS = { 411 **parser.Parser.RANGE_PARSERS, 412 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 413 and self._parse_in(this, is_global=True), 414 } 415 416 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 417 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 418 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 419 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 420 421 JOIN_KINDS = { 422 *parser.Parser.JOIN_KINDS, 423 TokenType.ANY, 424 TokenType.ASOF, 425 TokenType.ARRAY, 426 } 427 428 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 429 TokenType.ANY, 430 TokenType.ARRAY, 431 TokenType.FINAL, 432 TokenType.FORMAT, 433 TokenType.SETTINGS, 434 } 435 436 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 437 TokenType.FORMAT, 438 } 439 440 LOG_DEFAULTS_TO_LN = True 441 442 QUERY_MODIFIER_PARSERS = { 443 **parser.Parser.QUERY_MODIFIER_PARSERS, 444 TokenType.SETTINGS: lambda self: ( 445 "settings", 446 self._advance() or self._parse_csv(self._parse_assignment), 447 ), 448 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 449 } 450 451 CONSTRAINT_PARSERS = { 452 **parser.Parser.CONSTRAINT_PARSERS, 453 "INDEX": lambda self: self._parse_index_constraint(), 454 "CODEC": lambda self: self._parse_compress(), 455 } 456 457 ALTER_PARSERS = { 458 **parser.Parser.ALTER_PARSERS, 459 "REPLACE": lambda self: self._parse_alter_table_replace(), 460 } 461 462 SCHEMA_UNNAMED_CONSTRAINTS = { 463 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 464 "INDEX", 465 } 466 467 PLACEHOLDER_PARSERS = { 468 **parser.Parser.PLACEHOLDER_PARSERS, 469 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 470 } 471 472 def _parse_types( 473 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 474 ) -> t.Optional[exp.Expression]: 475 dtype = super()._parse_types( 476 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 477 ) 478 if isinstance(dtype, exp.DataType): 479 # Mark every type as non-nullable which is ClickHouse's default. This marker 480 # helps us transpile types from other dialects to ClickHouse, so that we can 481 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 482 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 483 # `Nullable` type constructor 484 dtype.set("nullable", False) 485 486 return dtype 487 488 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 489 index = self._index 490 this = self._parse_bitwise() 491 if self._match(TokenType.FROM): 492 self._retreat(index) 493 return super()._parse_extract() 494 495 # We return Anonymous here because extract and regexpExtract have different semantics, 496 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 497 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 498 # 499 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 500 self._match(TokenType.COMMA) 501 return self.expression( 502 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 503 ) 504 505 def _parse_assignment(self) -> t.Optional[exp.Expression]: 506 this = super()._parse_assignment() 507 508 if self._match(TokenType.PLACEHOLDER): 509 return self.expression( 510 exp.If, 511 this=this, 512 true=self._parse_assignment(), 513 false=self._match(TokenType.COLON) and self._parse_assignment(), 514 ) 515 516 return this 517 518 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 519 """ 520 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 521 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 522 """ 523 this = self._parse_id_var() 524 self._match(TokenType.COLON) 525 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 526 self._match_text_seq("IDENTIFIER") and "Identifier" 527 ) 528 529 if not kind: 530 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 531 elif not self._match(TokenType.R_BRACE): 532 self.raise_error("Expecting }") 533 534 return self.expression(exp.Placeholder, this=this, kind=kind) 535 536 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 537 this = super()._parse_in(this) 538 this.set("is_global", is_global) 539 return this 540 541 def _parse_table( 542 self, 543 schema: bool = False, 544 joins: bool = False, 545 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 546 parse_bracket: bool = False, 547 is_db_reference: bool = False, 548 parse_partition: bool = False, 549 ) -> t.Optional[exp.Expression]: 550 this = super()._parse_table( 551 schema=schema, 552 joins=joins, 553 alias_tokens=alias_tokens, 554 parse_bracket=parse_bracket, 555 is_db_reference=is_db_reference, 556 ) 557 558 if self._match(TokenType.FINAL): 559 this = self.expression(exp.Final, this=this) 560 561 return this 562 563 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 564 return super()._parse_position(haystack_first=True) 565 566 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 567 def _parse_cte(self) -> exp.CTE: 568 # WITH <identifier> AS <subquery expression> 569 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 570 571 if not cte: 572 # WITH <expression> AS <identifier> 573 cte = self.expression( 574 exp.CTE, 575 this=self._parse_assignment(), 576 alias=self._parse_table_alias(), 577 scalar=True, 578 ) 579 580 return cte 581 582 def _parse_join_parts( 583 self, 584 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 585 is_global = self._match(TokenType.GLOBAL) and self._prev 586 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 587 588 if kind_pre: 589 kind = self._match_set(self.JOIN_KINDS) and self._prev 590 side = self._match_set(self.JOIN_SIDES) and self._prev 591 return is_global, side, kind 592 593 return ( 594 is_global, 595 self._match_set(self.JOIN_SIDES) and self._prev, 596 self._match_set(self.JOIN_KINDS) and self._prev, 597 ) 598 599 def _parse_join( 600 self, skip_join_token: bool = False, parse_bracket: bool = False 601 ) -> t.Optional[exp.Join]: 602 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 603 if join: 604 join.set("global", join.args.pop("method", None)) 605 606 return join 607 608 def _parse_function( 609 self, 610 functions: t.Optional[t.Dict[str, t.Callable]] = None, 611 anonymous: bool = False, 612 optional_parens: bool = True, 613 any_token: bool = False, 614 ) -> t.Optional[exp.Expression]: 615 expr = super()._parse_function( 616 functions=functions, 617 anonymous=anonymous, 618 optional_parens=optional_parens, 619 any_token=any_token, 620 ) 621 622 func = expr.this if isinstance(expr, exp.Window) else expr 623 624 # Aggregate functions can be split in 2 parts: <func_name><suffix> 625 parts = ( 626 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 627 ) 628 629 if parts: 630 params = self._parse_func_params(func) 631 632 kwargs = { 633 "this": func.this, 634 "expressions": func.expressions, 635 } 636 if parts[1]: 637 kwargs["parts"] = parts 638 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 639 else: 640 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 641 642 kwargs["exp_class"] = exp_class 643 if params: 644 kwargs["params"] = params 645 646 func = self.expression(**kwargs) 647 648 if isinstance(expr, exp.Window): 649 # The window's func was parsed as Anonymous in base parser, fix its 650 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 651 expr.set("this", func) 652 elif params: 653 # Params have blocked super()._parse_function() from parsing the following window 654 # (if that exists) as they're standing between the function call and the window spec 655 expr = self._parse_window(func) 656 else: 657 expr = func 658 659 return expr 660 661 def _parse_func_params( 662 self, this: t.Optional[exp.Func] = None 663 ) -> t.Optional[t.List[exp.Expression]]: 664 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 665 return self._parse_csv(self._parse_lambda) 666 667 if self._match(TokenType.L_PAREN): 668 params = self._parse_csv(self._parse_lambda) 669 self._match_r_paren(this) 670 return params 671 672 return None 673 674 def _parse_quantile(self) -> exp.Quantile: 675 this = self._parse_lambda() 676 params = self._parse_func_params() 677 if params: 678 return self.expression(exp.Quantile, this=params[0], quantile=this) 679 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 680 681 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 682 return super()._parse_wrapped_id_vars(optional=True) 683 684 def _parse_primary_key( 685 self, wrapped_optional: bool = False, in_props: bool = False 686 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 687 return super()._parse_primary_key( 688 wrapped_optional=wrapped_optional or in_props, in_props=in_props 689 ) 690 691 def _parse_on_property(self) -> t.Optional[exp.Expression]: 692 index = self._index 693 if self._match_text_seq("CLUSTER"): 694 this = self._parse_id_var() 695 if this: 696 return self.expression(exp.OnCluster, this=this) 697 else: 698 self._retreat(index) 699 return None 700 701 def _parse_index_constraint( 702 self, kind: t.Optional[str] = None 703 ) -> exp.IndexColumnConstraint: 704 # INDEX name1 expr TYPE type1(args) GRANULARITY value 705 this = self._parse_id_var() 706 expression = self._parse_assignment() 707 708 index_type = self._match_text_seq("TYPE") and ( 709 self._parse_function() or self._parse_var() 710 ) 711 712 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 713 714 return self.expression( 715 exp.IndexColumnConstraint, 716 this=this, 717 expression=expression, 718 index_type=index_type, 719 granularity=granularity, 720 ) 721 722 def _parse_partition(self) -> t.Optional[exp.Partition]: 723 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 724 if not self._match(TokenType.PARTITION): 725 return None 726 727 if self._match_text_seq("ID"): 728 # Corresponds to the PARTITION ID <string_value> syntax 729 expressions: t.List[exp.Expression] = [ 730 self.expression(exp.PartitionId, this=self._parse_string()) 731 ] 732 else: 733 expressions = self._parse_expressions() 734 735 return self.expression(exp.Partition, expressions=expressions) 736 737 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 738 partition = self._parse_partition() 739 740 if not partition or not self._match(TokenType.FROM): 741 return None 742 743 return self.expression( 744 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 745 ) 746 747 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 748 if not self._match_text_seq("PROJECTION"): 749 return None 750 751 return self.expression( 752 exp.ProjectionDef, 753 this=self._parse_id_var(), 754 expression=self._parse_wrapped(self._parse_statement), 755 ) 756 757 def _parse_constraint(self) -> t.Optional[exp.Expression]: 758 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
760 class Generator(generator.Generator): 761 QUERY_HINTS = False 762 STRUCT_DELIMITER = ("(", ")") 763 NVL2_SUPPORTED = False 764 TABLESAMPLE_REQUIRES_PARENS = False 765 TABLESAMPLE_SIZE_IS_ROWS = False 766 TABLESAMPLE_KEYWORDS = "SAMPLE" 767 LAST_DAY_SUPPORTS_DATE_PART = False 768 CAN_IMPLEMENT_ARRAY_ANY = True 769 SUPPORTS_TO_NUMBER = False 770 JOIN_HINTS = False 771 TABLE_HINTS = False 772 GROUPINGS_SEP = "" 773 SET_OP_MODIFIERS = False 774 SUPPORTS_TABLE_ALIAS_COLUMNS = False 775 VALUES_AS_TABLE = False 776 777 STRING_TYPE_MAPPING = { 778 exp.DataType.Type.CHAR: "String", 779 exp.DataType.Type.LONGBLOB: "String", 780 exp.DataType.Type.LONGTEXT: "String", 781 exp.DataType.Type.MEDIUMBLOB: "String", 782 exp.DataType.Type.MEDIUMTEXT: "String", 783 exp.DataType.Type.TINYBLOB: "String", 784 exp.DataType.Type.TINYTEXT: "String", 785 exp.DataType.Type.TEXT: "String", 786 exp.DataType.Type.VARBINARY: "String", 787 exp.DataType.Type.VARCHAR: "String", 788 } 789 790 SUPPORTED_JSON_PATH_PARTS = { 791 exp.JSONPathKey, 792 exp.JSONPathRoot, 793 exp.JSONPathSubscript, 794 } 795 796 TYPE_MAPPING = { 797 **generator.Generator.TYPE_MAPPING, 798 **STRING_TYPE_MAPPING, 799 exp.DataType.Type.ARRAY: "Array", 800 exp.DataType.Type.BIGINT: "Int64", 801 exp.DataType.Type.DATE32: "Date32", 802 exp.DataType.Type.DATETIME: "DateTime", 803 exp.DataType.Type.DATETIME64: "DateTime64", 804 exp.DataType.Type.TIMESTAMP: "DateTime", 805 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 806 exp.DataType.Type.DOUBLE: "Float64", 807 exp.DataType.Type.ENUM: "Enum", 808 exp.DataType.Type.ENUM8: "Enum8", 809 exp.DataType.Type.ENUM16: "Enum16", 810 exp.DataType.Type.FIXEDSTRING: "FixedString", 811 exp.DataType.Type.FLOAT: "Float32", 812 exp.DataType.Type.INT: "Int32", 813 exp.DataType.Type.MEDIUMINT: "Int32", 814 exp.DataType.Type.INT128: "Int128", 815 exp.DataType.Type.INT256: "Int256", 816 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 817 exp.DataType.Type.MAP: "Map", 818 exp.DataType.Type.NESTED: "Nested", 819 exp.DataType.Type.NULLABLE: "Nullable", 820 exp.DataType.Type.SMALLINT: "Int16", 821 exp.DataType.Type.STRUCT: "Tuple", 822 exp.DataType.Type.TINYINT: "Int8", 823 exp.DataType.Type.UBIGINT: "UInt64", 824 exp.DataType.Type.UINT: "UInt32", 825 exp.DataType.Type.UINT128: "UInt128", 826 exp.DataType.Type.UINT256: "UInt256", 827 exp.DataType.Type.USMALLINT: "UInt16", 828 exp.DataType.Type.UTINYINT: "UInt8", 829 exp.DataType.Type.IPV4: "IPv4", 830 exp.DataType.Type.IPV6: "IPv6", 831 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 832 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 833 } 834 835 TRANSFORMS = { 836 **generator.Generator.TRANSFORMS, 837 exp.AnyValue: rename_func("any"), 838 exp.ApproxDistinct: rename_func("uniq"), 839 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 840 exp.ArraySize: rename_func("LENGTH"), 841 exp.ArraySum: rename_func("arraySum"), 842 exp.ArgMax: arg_max_or_min_no_count("argMax"), 843 exp.ArgMin: arg_max_or_min_no_count("argMin"), 844 exp.Array: inline_array_sql, 845 exp.CastToStrType: rename_func("CAST"), 846 exp.CountIf: rename_func("countIf"), 847 exp.CompressColumnConstraint: lambda self, 848 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 849 exp.ComputedColumnConstraint: lambda self, 850 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 851 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 852 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 853 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 854 exp.DateStrToDate: rename_func("toDate"), 855 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 856 exp.Explode: rename_func("arrayJoin"), 857 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 858 exp.IsNan: rename_func("isNaN"), 859 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 860 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 861 exp.JSONPathKey: json_path_key_only_name, 862 exp.JSONPathRoot: lambda *_: "", 863 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 864 exp.Nullif: rename_func("nullIf"), 865 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 866 exp.Pivot: no_pivot_sql, 867 exp.Quantile: _quantile_sql, 868 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 869 exp.Rand: rename_func("randCanonical"), 870 exp.StartsWith: rename_func("startsWith"), 871 exp.StrPosition: lambda self, e: self.func( 872 "position", e.this, e.args.get("substr"), e.args.get("position") 873 ), 874 exp.TimeToStr: lambda self, e: self.func( 875 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("zone") 876 ), 877 exp.TimeStrToTime: _timestrtotime_sql, 878 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 879 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 880 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 881 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 882 exp.MD5Digest: rename_func("MD5"), 883 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 884 exp.SHA: rename_func("SHA1"), 885 exp.SHA2: sha256_sql, 886 exp.UnixToTime: _unix_to_time_sql, 887 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 888 exp.Trim: trim_sql, 889 exp.Variance: rename_func("varSamp"), 890 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 891 exp.Stddev: rename_func("stddevSamp"), 892 exp.Chr: lambda self, e: self.func("char", e.this), 893 exp.Lag: lambda self, e: self.func( 894 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 895 ), 896 exp.Lead: lambda self, e: self.func( 897 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 898 ), 899 } 900 901 PROPERTIES_LOCATION = { 902 **generator.Generator.PROPERTIES_LOCATION, 903 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 904 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 905 exp.OnCluster: exp.Properties.Location.POST_NAME, 906 } 907 908 # There's no list in docs, but it can be found in Clickhouse code 909 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 910 ON_CLUSTER_TARGETS = { 911 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 912 "DATABASE", 913 "TABLE", 914 "VIEW", 915 "DICTIONARY", 916 "INDEX", 917 "FUNCTION", 918 "NAMED COLLECTION", 919 } 920 921 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 922 NON_NULLABLE_TYPES = { 923 exp.DataType.Type.ARRAY, 924 exp.DataType.Type.MAP, 925 exp.DataType.Type.NULLABLE, 926 exp.DataType.Type.STRUCT, 927 } 928 929 def strtodate_sql(self, expression: exp.StrToDate) -> str: 930 strtodate_sql = self.function_fallback_sql(expression) 931 932 if not isinstance(expression.parent, exp.Cast): 933 # StrToDate returns DATEs in other dialects (eg. postgres), so 934 # this branch aims to improve the transpilation to clickhouse 935 return f"CAST({strtodate_sql} AS DATE)" 936 937 return strtodate_sql 938 939 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 940 this = expression.this 941 942 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 943 return self.sql(this) 944 945 return super().cast_sql(expression, safe_prefix=safe_prefix) 946 947 def trycast_sql(self, expression: exp.TryCast) -> str: 948 dtype = expression.to 949 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 950 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 951 dtype.set("nullable", True) 952 953 return super().cast_sql(expression) 954 955 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 956 this = self.json_path_part(expression.this) 957 return str(int(this) + 1) if is_int(this) else this 958 959 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 960 return f"AS {self.sql(expression, 'this')}" 961 962 def _any_to_has( 963 self, 964 expression: exp.EQ | exp.NEQ, 965 default: t.Callable[[t.Any], str], 966 prefix: str = "", 967 ) -> str: 968 if isinstance(expression.left, exp.Any): 969 arr = expression.left 970 this = expression.right 971 elif isinstance(expression.right, exp.Any): 972 arr = expression.right 973 this = expression.left 974 else: 975 return default(expression) 976 977 return prefix + self.func("has", arr.this.unnest(), this) 978 979 def eq_sql(self, expression: exp.EQ) -> str: 980 return self._any_to_has(expression, super().eq_sql) 981 982 def neq_sql(self, expression: exp.NEQ) -> str: 983 return self._any_to_has(expression, super().neq_sql, "NOT ") 984 985 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 986 # Manually add a flag to make the search case-insensitive 987 regex = self.func("CONCAT", "'(?i)'", expression.expression) 988 return self.func("match", expression.this, regex) 989 990 def datatype_sql(self, expression: exp.DataType) -> str: 991 # String is the standard ClickHouse type, every other variant is just an alias. 992 # Additionally, any supplied length parameter will be ignored. 993 # 994 # https://clickhouse.com/docs/en/sql-reference/data-types/string 995 if expression.this in self.STRING_TYPE_MAPPING: 996 dtype = "String" 997 else: 998 dtype = super().datatype_sql(expression) 999 1000 # This section changes the type to `Nullable(...)` if the following conditions hold: 1001 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1002 # and change their semantics 1003 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1004 # constraint: "Type of Map key must be a type, that can be represented by integer or 1005 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1006 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1007 parent = expression.parent 1008 if ( 1009 expression.args.get("nullable") is not False 1010 and not ( 1011 isinstance(parent, exp.DataType) 1012 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1013 and expression.index in (None, 0) 1014 ) 1015 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1016 ): 1017 dtype = f"Nullable({dtype})" 1018 1019 return dtype 1020 1021 def cte_sql(self, expression: exp.CTE) -> str: 1022 if expression.args.get("scalar"): 1023 this = self.sql(expression, "this") 1024 alias = self.sql(expression, "alias") 1025 return f"{this} AS {alias}" 1026 1027 return super().cte_sql(expression) 1028 1029 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1030 return super().after_limit_modifiers(expression) + [ 1031 ( 1032 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1033 if expression.args.get("settings") 1034 else "" 1035 ), 1036 ( 1037 self.seg("FORMAT ") + self.sql(expression, "format") 1038 if expression.args.get("format") 1039 else "" 1040 ), 1041 ] 1042 1043 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1044 params = self.expressions(expression, key="params", flat=True) 1045 return self.func(expression.name, *expression.expressions) + f"({params})" 1046 1047 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1048 return self.func(expression.name, *expression.expressions) 1049 1050 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1051 return self.anonymousaggfunc_sql(expression) 1052 1053 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1054 return self.parameterizedagg_sql(expression) 1055 1056 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1057 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1058 1059 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1060 return f"ON CLUSTER {self.sql(expression, 'this')}" 1061 1062 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1063 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1064 exp.Properties.Location.POST_NAME 1065 ): 1066 this_name = self.sql( 1067 expression.this if isinstance(expression.this, exp.Schema) else expression, 1068 "this", 1069 ) 1070 this_properties = " ".join( 1071 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1072 ) 1073 this_schema = self.schema_columns_sql(expression.this) 1074 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1075 1076 return super().createable_sql(expression, locations) 1077 1078 def create_sql(self, expression: exp.Create) -> str: 1079 # The comment property comes last in CTAS statements, i.e. after the query 1080 query = expression.expression 1081 if isinstance(query, exp.Query): 1082 comment_prop = expression.find(exp.SchemaCommentProperty) 1083 if comment_prop: 1084 comment_prop.pop() 1085 query.replace(exp.paren(query)) 1086 else: 1087 comment_prop = None 1088 1089 create_sql = super().create_sql(expression) 1090 1091 comment_sql = self.sql(comment_prop) 1092 comment_sql = f" {comment_sql}" if comment_sql else "" 1093 1094 return f"{create_sql}{comment_sql}" 1095 1096 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1097 this = self.indent(self.sql(expression, "this")) 1098 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1099 1100 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1101 this = self.sql(expression, "this") 1102 this = f" {this}" if this else "" 1103 expr = self.sql(expression, "expression") 1104 expr = f" {expr}" if expr else "" 1105 index_type = self.sql(expression, "index_type") 1106 index_type = f" TYPE {index_type}" if index_type else "" 1107 granularity = self.sql(expression, "granularity") 1108 granularity = f" GRANULARITY {granularity}" if granularity else "" 1109 1110 return f"INDEX{this}{expr}{index_type}{granularity}" 1111 1112 def partition_sql(self, expression: exp.Partition) -> str: 1113 return f"PARTITION {self.expressions(expression, flat=True)}" 1114 1115 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1116 return f"ID {self.sql(expression.this)}" 1117 1118 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1119 return ( 1120 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1121 ) 1122 1123 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1124 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
929 def strtodate_sql(self, expression: exp.StrToDate) -> str: 930 strtodate_sql = self.function_fallback_sql(expression) 931 932 if not isinstance(expression.parent, exp.Cast): 933 # StrToDate returns DATEs in other dialects (eg. postgres), so 934 # this branch aims to improve the transpilation to clickhouse 935 return f"CAST({strtodate_sql} AS DATE)" 936 937 return strtodate_sql
939 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 940 this = expression.this 941 942 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 943 return self.sql(this) 944 945 return super().cast_sql(expression, safe_prefix=safe_prefix)
947 def trycast_sql(self, expression: exp.TryCast) -> str: 948 dtype = expression.to 949 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 950 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 951 dtype.set("nullable", True) 952 953 return super().cast_sql(expression)
990 def datatype_sql(self, expression: exp.DataType) -> str: 991 # String is the standard ClickHouse type, every other variant is just an alias. 992 # Additionally, any supplied length parameter will be ignored. 993 # 994 # https://clickhouse.com/docs/en/sql-reference/data-types/string 995 if expression.this in self.STRING_TYPE_MAPPING: 996 dtype = "String" 997 else: 998 dtype = super().datatype_sql(expression) 999 1000 # This section changes the type to `Nullable(...)` if the following conditions hold: 1001 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1002 # and change their semantics 1003 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1004 # constraint: "Type of Map key must be a type, that can be represented by integer or 1005 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1006 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1007 parent = expression.parent 1008 if ( 1009 expression.args.get("nullable") is not False 1010 and not ( 1011 isinstance(parent, exp.DataType) 1012 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1013 and expression.index in (None, 0) 1014 ) 1015 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1016 ): 1017 dtype = f"Nullable({dtype})" 1018 1019 return dtype
1029 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1030 return super().after_limit_modifiers(expression) + [ 1031 ( 1032 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1033 if expression.args.get("settings") 1034 else "" 1035 ), 1036 ( 1037 self.seg("FORMAT ") + self.sql(expression, "format") 1038 if expression.args.get("format") 1039 else "" 1040 ), 1041 ]
1062 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1063 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1064 exp.Properties.Location.POST_NAME 1065 ): 1066 this_name = self.sql( 1067 expression.this if isinstance(expression.this, exp.Schema) else expression, 1068 "this", 1069 ) 1070 this_properties = " ".join( 1071 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1072 ) 1073 this_schema = self.schema_columns_sql(expression.this) 1074 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1075 1076 return super().createable_sql(expression, locations)
1078 def create_sql(self, expression: exp.Create) -> str: 1079 # The comment property comes last in CTAS statements, i.e. after the query 1080 query = expression.expression 1081 if isinstance(query, exp.Query): 1082 comment_prop = expression.find(exp.SchemaCommentProperty) 1083 if comment_prop: 1084 comment_prop.pop() 1085 query.replace(exp.paren(query)) 1086 else: 1087 comment_prop = None 1088 1089 create_sql = super().create_sql(expression) 1090 1091 comment_sql = self.sql(comment_prop) 1092 comment_sql = f" {comment_sql}" if comment_sql else "" 1093 1094 return f"{create_sql}{comment_sql}"
1100 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1101 this = self.sql(expression, "this") 1102 this = f" {this}" if this else "" 1103 expr = self.sql(expression, "expression") 1104 expr = f" {expr}" if expr else "" 1105 index_type = self.sql(expression, "index_type") 1106 index_type = f" TYPE {index_type}" if index_type else "" 1107 granularity = self.sql(expression, "granularity") 1108 granularity = f" GRANULARITY {granularity}" if granularity else "" 1109 1110 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_NULLABLE_TYPES
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql