sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21 trim_sql, 22) 23from sqlglot.generator import Generator 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import Token, TokenType 26 27DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 28 29 30def _build_date_format(args: t.List) -> exp.TimeToStr: 31 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 32 33 timezone = seq_get(args, 2) 34 if timezone: 35 expr.set("zone", timezone) 36 37 return expr 38 39 40def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 41 scale = expression.args.get("scale") 42 timestamp = expression.this 43 44 if scale in (None, exp.UnixToTime.SECONDS): 45 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 46 if scale == exp.UnixToTime.MILLIS: 47 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MICROS: 49 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.NANOS: 51 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 53 return self.func( 54 "fromUnixTimestamp", 55 exp.cast( 56 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 57 ), 58 ) 59 60 61def _lower_func(sql: str) -> str: 62 index = sql.index("(") 63 return sql[:index].lower() + sql[index:] 64 65 66def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 67 quantile = expression.args["quantile"] 68 args = f"({self.sql(expression, 'this')})" 69 70 if isinstance(quantile, exp.Array): 71 func = self.func("quantiles", *quantile) 72 else: 73 func = self.func("quantile", quantile) 74 75 return func + args 76 77 78def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 79 if len(args) == 1: 80 return exp.CountIf(this=seq_get(args, 0)) 81 82 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 83 84 85def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 86 if len(args) == 3: 87 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 88 89 strtodate = exp.StrToDate.from_arg_list(args) 90 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 91 92 93def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 94 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 95 if not expression.unit: 96 return rename_func(name)(self, expression) 97 98 return self.func( 99 name, 100 unit_to_var(expression), 101 expression.expression, 102 expression.this, 103 ) 104 105 return _delta_sql 106 107 108def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 109 ts = expression.this 110 111 tz = expression.args.get("zone") 112 if tz and isinstance(ts, exp.Literal): 113 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 114 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 115 # 116 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 117 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 118 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 119 ts_string = ts.name.strip() 120 121 # separate [date and time] from [fractional seconds and UTC offset] 122 ts_parts = ts_string.split(".") 123 if len(ts_parts) == 2: 124 # separate fractional seconds and UTC offset 125 offset_sep = "+" if "+" in ts_parts[1] else "-" 126 ts_frac_parts = ts_parts[1].split(offset_sep) 127 num_frac_parts = len(ts_frac_parts) 128 129 # pad to 6 digits if fractional seconds present 130 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 131 ts_string = "".join( 132 [ 133 ts_parts[0], # date and time 134 ".", 135 ts_frac_parts[0], # fractional seconds 136 offset_sep if num_frac_parts > 1 else "", 137 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 138 ] 139 ) 140 141 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 142 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 143 # it's part of the timestamp string 144 ts_without_tz = ( 145 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 146 ) 147 ts = exp.Literal.string(ts_without_tz) 148 149 # Non-nullable DateTime64 with microsecond precision 150 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 151 datatype = exp.DataType.build( 152 exp.DataType.Type.DATETIME64, 153 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 154 nullable=False, 155 ) 156 157 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 158 159 160class ClickHouse(Dialect): 161 NORMALIZE_FUNCTIONS: bool | str = False 162 NULL_ORDERING = "nulls_are_last" 163 SUPPORTS_USER_DEFINED_TYPES = False 164 SAFE_DIVISION = True 165 LOG_BASE_FIRST: t.Optional[bool] = None 166 FORCE_EARLY_ALIAS_REF_EXPANSION = True 167 168 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 169 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 170 171 UNESCAPED_SEQUENCES = { 172 "\\0": "\0", 173 } 174 175 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 176 177 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 178 exp.Except: False, 179 exp.Intersect: False, 180 exp.Union: None, 181 } 182 183 class Tokenizer(tokens.Tokenizer): 184 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 185 IDENTIFIERS = ['"', "`"] 186 IDENTIFIER_ESCAPES = ["\\"] 187 STRING_ESCAPES = ["'", "\\"] 188 BIT_STRINGS = [("0b", "")] 189 HEX_STRINGS = [("0x", ""), ("0X", "")] 190 HEREDOC_STRINGS = ["$"] 191 192 KEYWORDS = { 193 **tokens.Tokenizer.KEYWORDS, 194 "ATTACH": TokenType.COMMAND, 195 "DATE32": TokenType.DATE32, 196 "DATETIME64": TokenType.DATETIME64, 197 "DICTIONARY": TokenType.DICTIONARY, 198 "ENUM8": TokenType.ENUM8, 199 "ENUM16": TokenType.ENUM16, 200 "FINAL": TokenType.FINAL, 201 "FIXEDSTRING": TokenType.FIXEDSTRING, 202 "FLOAT32": TokenType.FLOAT, 203 "FLOAT64": TokenType.DOUBLE, 204 "GLOBAL": TokenType.GLOBAL, 205 "INT256": TokenType.INT256, 206 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 207 "MAP": TokenType.MAP, 208 "NESTED": TokenType.NESTED, 209 "SAMPLE": TokenType.TABLE_SAMPLE, 210 "TUPLE": TokenType.STRUCT, 211 "UINT128": TokenType.UINT128, 212 "UINT16": TokenType.USMALLINT, 213 "UINT256": TokenType.UINT256, 214 "UINT32": TokenType.UINT, 215 "UINT64": TokenType.UBIGINT, 216 "UINT8": TokenType.UTINYINT, 217 "IPV4": TokenType.IPV4, 218 "IPV6": TokenType.IPV6, 219 "POINT": TokenType.POINT, 220 "RING": TokenType.RING, 221 "LINESTRING": TokenType.LINESTRING, 222 "MULTILINESTRING": TokenType.MULTILINESTRING, 223 "POLYGON": TokenType.POLYGON, 224 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 225 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 226 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 227 "SYSTEM": TokenType.COMMAND, 228 "PREWHERE": TokenType.PREWHERE, 229 } 230 KEYWORDS.pop("/*+") 231 232 SINGLE_TOKENS = { 233 **tokens.Tokenizer.SINGLE_TOKENS, 234 "$": TokenType.HEREDOC_STRING, 235 } 236 237 class Parser(parser.Parser): 238 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 239 # * select x from t1 union all select x from t2 limit 1; 240 # * select x from t1 union all (select x from t2 limit 1); 241 MODIFIERS_ATTACHED_TO_SET_OP = False 242 INTERVAL_SPANS = False 243 244 FUNCTIONS = { 245 **parser.Parser.FUNCTIONS, 246 "ANY": exp.AnyValue.from_arg_list, 247 "ARRAYSUM": exp.ArraySum.from_arg_list, 248 "COUNTIF": _build_count_if, 249 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 250 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 251 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 252 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 253 "DATE_FORMAT": _build_date_format, 254 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 255 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 256 "FORMATDATETIME": _build_date_format, 257 "JSONEXTRACTSTRING": build_json_extract_path( 258 exp.JSONExtractScalar, zero_based_indexing=False 259 ), 260 "MAP": parser.build_var_map, 261 "MATCH": exp.RegexpLike.from_arg_list, 262 "RANDCANONICAL": exp.Rand.from_arg_list, 263 "STR_TO_DATE": _build_str_to_date, 264 "TUPLE": exp.Struct.from_arg_list, 265 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 266 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 267 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 268 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 269 "UNIQ": exp.ApproxDistinct.from_arg_list, 270 "XOR": lambda args: exp.Xor(expressions=args), 271 "MD5": exp.MD5Digest.from_arg_list, 272 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 273 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 274 } 275 276 AGG_FUNCTIONS = { 277 "count", 278 "min", 279 "max", 280 "sum", 281 "avg", 282 "any", 283 "stddevPop", 284 "stddevSamp", 285 "varPop", 286 "varSamp", 287 "corr", 288 "covarPop", 289 "covarSamp", 290 "entropy", 291 "exponentialMovingAverage", 292 "intervalLengthSum", 293 "kolmogorovSmirnovTest", 294 "mannWhitneyUTest", 295 "median", 296 "rankCorr", 297 "sumKahan", 298 "studentTTest", 299 "welchTTest", 300 "anyHeavy", 301 "anyLast", 302 "boundingRatio", 303 "first_value", 304 "last_value", 305 "argMin", 306 "argMax", 307 "avgWeighted", 308 "topK", 309 "topKWeighted", 310 "deltaSum", 311 "deltaSumTimestamp", 312 "groupArray", 313 "groupArrayLast", 314 "groupUniqArray", 315 "groupArrayInsertAt", 316 "groupArrayMovingAvg", 317 "groupArrayMovingSum", 318 "groupArraySample", 319 "groupBitAnd", 320 "groupBitOr", 321 "groupBitXor", 322 "groupBitmap", 323 "groupBitmapAnd", 324 "groupBitmapOr", 325 "groupBitmapXor", 326 "sumWithOverflow", 327 "sumMap", 328 "minMap", 329 "maxMap", 330 "skewSamp", 331 "skewPop", 332 "kurtSamp", 333 "kurtPop", 334 "uniq", 335 "uniqExact", 336 "uniqCombined", 337 "uniqCombined64", 338 "uniqHLL12", 339 "uniqTheta", 340 "quantile", 341 "quantiles", 342 "quantileExact", 343 "quantilesExact", 344 "quantileExactLow", 345 "quantilesExactLow", 346 "quantileExactHigh", 347 "quantilesExactHigh", 348 "quantileExactWeighted", 349 "quantilesExactWeighted", 350 "quantileTiming", 351 "quantilesTiming", 352 "quantileTimingWeighted", 353 "quantilesTimingWeighted", 354 "quantileDeterministic", 355 "quantilesDeterministic", 356 "quantileTDigest", 357 "quantilesTDigest", 358 "quantileTDigestWeighted", 359 "quantilesTDigestWeighted", 360 "quantileBFloat16", 361 "quantilesBFloat16", 362 "quantileBFloat16Weighted", 363 "quantilesBFloat16Weighted", 364 "simpleLinearRegression", 365 "stochasticLinearRegression", 366 "stochasticLogisticRegression", 367 "categoricalInformationValue", 368 "contingency", 369 "cramersV", 370 "cramersVBiasCorrected", 371 "theilsU", 372 "maxIntersections", 373 "maxIntersectionsPosition", 374 "meanZTest", 375 "quantileInterpolatedWeighted", 376 "quantilesInterpolatedWeighted", 377 "quantileGK", 378 "quantilesGK", 379 "sparkBar", 380 "sumCount", 381 "largestTriangleThreeBuckets", 382 "histogram", 383 "sequenceMatch", 384 "sequenceCount", 385 "windowFunnel", 386 "retention", 387 "uniqUpTo", 388 "sequenceNextNode", 389 "exponentialTimeDecayedAvg", 390 } 391 392 AGG_FUNCTIONS_SUFFIXES = [ 393 "If", 394 "Array", 395 "ArrayIf", 396 "Map", 397 "SimpleState", 398 "State", 399 "Merge", 400 "MergeState", 401 "ForEach", 402 "Distinct", 403 "OrDefault", 404 "OrNull", 405 "Resample", 406 "ArgMin", 407 "ArgMax", 408 ] 409 410 FUNC_TOKENS = { 411 *parser.Parser.FUNC_TOKENS, 412 TokenType.SET, 413 } 414 415 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 416 417 ID_VAR_TOKENS = { 418 *parser.Parser.ID_VAR_TOKENS, 419 TokenType.LIKE, 420 } 421 422 AGG_FUNC_MAPPING = ( 423 lambda functions, suffixes: { 424 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 425 } 426 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 427 428 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 429 430 FUNCTION_PARSERS = { 431 **parser.Parser.FUNCTION_PARSERS, 432 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 433 "QUANTILE": lambda self: self._parse_quantile(), 434 "MEDIAN": lambda self: self._parse_quantile(), 435 "COLUMNS": lambda self: self._parse_columns(), 436 } 437 438 FUNCTION_PARSERS.pop("MATCH") 439 440 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 441 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 442 443 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 444 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 445 446 RANGE_PARSERS = { 447 **parser.Parser.RANGE_PARSERS, 448 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 449 and self._parse_in(this, is_global=True), 450 } 451 452 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 453 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 454 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 455 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 456 457 JOIN_KINDS = { 458 *parser.Parser.JOIN_KINDS, 459 TokenType.ANY, 460 TokenType.ASOF, 461 TokenType.ARRAY, 462 } 463 464 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 465 TokenType.ANY, 466 TokenType.ARRAY, 467 TokenType.FINAL, 468 TokenType.FORMAT, 469 TokenType.SETTINGS, 470 } 471 472 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 473 TokenType.FORMAT, 474 } 475 476 LOG_DEFAULTS_TO_LN = True 477 478 QUERY_MODIFIER_PARSERS = { 479 **parser.Parser.QUERY_MODIFIER_PARSERS, 480 TokenType.SETTINGS: lambda self: ( 481 "settings", 482 self._advance() or self._parse_csv(self._parse_assignment), 483 ), 484 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 485 } 486 487 CONSTRAINT_PARSERS = { 488 **parser.Parser.CONSTRAINT_PARSERS, 489 "INDEX": lambda self: self._parse_index_constraint(), 490 "CODEC": lambda self: self._parse_compress(), 491 } 492 493 ALTER_PARSERS = { 494 **parser.Parser.ALTER_PARSERS, 495 "REPLACE": lambda self: self._parse_alter_table_replace(), 496 } 497 498 SCHEMA_UNNAMED_CONSTRAINTS = { 499 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 500 "INDEX", 501 } 502 503 PLACEHOLDER_PARSERS = { 504 **parser.Parser.PLACEHOLDER_PARSERS, 505 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 506 } 507 508 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 509 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 510 return self._parse_lambda() 511 512 def _parse_types( 513 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 514 ) -> t.Optional[exp.Expression]: 515 dtype = super()._parse_types( 516 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 517 ) 518 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 519 # Mark every type as non-nullable which is ClickHouse's default, unless it's 520 # already marked as nullable. This marker helps us transpile types from other 521 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 522 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 523 # fail in ClickHouse without the `Nullable` type constructor. 524 dtype.set("nullable", False) 525 526 return dtype 527 528 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 529 index = self._index 530 this = self._parse_bitwise() 531 if self._match(TokenType.FROM): 532 self._retreat(index) 533 return super()._parse_extract() 534 535 # We return Anonymous here because extract and regexpExtract have different semantics, 536 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 537 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 538 # 539 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 540 self._match(TokenType.COMMA) 541 return self.expression( 542 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 543 ) 544 545 def _parse_assignment(self) -> t.Optional[exp.Expression]: 546 this = super()._parse_assignment() 547 548 if self._match(TokenType.PLACEHOLDER): 549 return self.expression( 550 exp.If, 551 this=this, 552 true=self._parse_assignment(), 553 false=self._match(TokenType.COLON) and self._parse_assignment(), 554 ) 555 556 return this 557 558 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 559 """ 560 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 561 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 562 """ 563 this = self._parse_id_var() 564 self._match(TokenType.COLON) 565 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 566 self._match_text_seq("IDENTIFIER") and "Identifier" 567 ) 568 569 if not kind: 570 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 571 elif not self._match(TokenType.R_BRACE): 572 self.raise_error("Expecting }") 573 574 return self.expression(exp.Placeholder, this=this, kind=kind) 575 576 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 577 this = super()._parse_in(this) 578 this.set("is_global", is_global) 579 return this 580 581 def _parse_table( 582 self, 583 schema: bool = False, 584 joins: bool = False, 585 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 586 parse_bracket: bool = False, 587 is_db_reference: bool = False, 588 parse_partition: bool = False, 589 ) -> t.Optional[exp.Expression]: 590 this = super()._parse_table( 591 schema=schema, 592 joins=joins, 593 alias_tokens=alias_tokens, 594 parse_bracket=parse_bracket, 595 is_db_reference=is_db_reference, 596 ) 597 598 if self._match(TokenType.FINAL): 599 this = self.expression(exp.Final, this=this) 600 601 return this 602 603 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 604 return super()._parse_position(haystack_first=True) 605 606 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 607 def _parse_cte(self) -> exp.CTE: 608 # WITH <identifier> AS <subquery expression> 609 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 610 611 if not cte: 612 # WITH <expression> AS <identifier> 613 cte = self.expression( 614 exp.CTE, 615 this=self._parse_assignment(), 616 alias=self._parse_table_alias(), 617 scalar=True, 618 ) 619 620 return cte 621 622 def _parse_join_parts( 623 self, 624 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 625 is_global = self._match(TokenType.GLOBAL) and self._prev 626 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 627 628 if kind_pre: 629 kind = self._match_set(self.JOIN_KINDS) and self._prev 630 side = self._match_set(self.JOIN_SIDES) and self._prev 631 return is_global, side, kind 632 633 return ( 634 is_global, 635 self._match_set(self.JOIN_SIDES) and self._prev, 636 self._match_set(self.JOIN_KINDS) and self._prev, 637 ) 638 639 def _parse_join( 640 self, skip_join_token: bool = False, parse_bracket: bool = False 641 ) -> t.Optional[exp.Join]: 642 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 643 if join: 644 join.set("global", join.args.pop("method", None)) 645 646 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 647 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 648 if join.kind == "ARRAY": 649 for table in join.find_all(exp.Table): 650 table.replace(table.to_column()) 651 652 return join 653 654 def _parse_function( 655 self, 656 functions: t.Optional[t.Dict[str, t.Callable]] = None, 657 anonymous: bool = False, 658 optional_parens: bool = True, 659 any_token: bool = False, 660 ) -> t.Optional[exp.Expression]: 661 expr = super()._parse_function( 662 functions=functions, 663 anonymous=anonymous, 664 optional_parens=optional_parens, 665 any_token=any_token, 666 ) 667 668 func = expr.this if isinstance(expr, exp.Window) else expr 669 670 # Aggregate functions can be split in 2 parts: <func_name><suffix> 671 parts = ( 672 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 673 ) 674 675 if parts: 676 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 677 params = self._parse_func_params(anon_func) 678 679 kwargs = { 680 "this": anon_func.this, 681 "expressions": anon_func.expressions, 682 } 683 if parts[1]: 684 kwargs["parts"] = parts 685 exp_class: t.Type[exp.Expression] = ( 686 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 687 ) 688 else: 689 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 690 691 kwargs["exp_class"] = exp_class 692 if params: 693 kwargs["params"] = params 694 695 func = self.expression(**kwargs) 696 697 if isinstance(expr, exp.Window): 698 # The window's func was parsed as Anonymous in base parser, fix its 699 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 700 expr.set("this", func) 701 elif params: 702 # Params have blocked super()._parse_function() from parsing the following window 703 # (if that exists) as they're standing between the function call and the window spec 704 expr = self._parse_window(func) 705 else: 706 expr = func 707 708 return expr 709 710 def _parse_func_params( 711 self, this: t.Optional[exp.Func] = None 712 ) -> t.Optional[t.List[exp.Expression]]: 713 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 714 return self._parse_csv(self._parse_lambda) 715 716 if self._match(TokenType.L_PAREN): 717 params = self._parse_csv(self._parse_lambda) 718 self._match_r_paren(this) 719 return params 720 721 return None 722 723 def _parse_quantile(self) -> exp.Quantile: 724 this = self._parse_lambda() 725 params = self._parse_func_params() 726 if params: 727 return self.expression(exp.Quantile, this=params[0], quantile=this) 728 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 729 730 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 731 return super()._parse_wrapped_id_vars(optional=True) 732 733 def _parse_primary_key( 734 self, wrapped_optional: bool = False, in_props: bool = False 735 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 736 return super()._parse_primary_key( 737 wrapped_optional=wrapped_optional or in_props, in_props=in_props 738 ) 739 740 def _parse_on_property(self) -> t.Optional[exp.Expression]: 741 index = self._index 742 if self._match_text_seq("CLUSTER"): 743 this = self._parse_id_var() 744 if this: 745 return self.expression(exp.OnCluster, this=this) 746 else: 747 self._retreat(index) 748 return None 749 750 def _parse_index_constraint( 751 self, kind: t.Optional[str] = None 752 ) -> exp.IndexColumnConstraint: 753 # INDEX name1 expr TYPE type1(args) GRANULARITY value 754 this = self._parse_id_var() 755 expression = self._parse_assignment() 756 757 index_type = self._match_text_seq("TYPE") and ( 758 self._parse_function() or self._parse_var() 759 ) 760 761 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 762 763 return self.expression( 764 exp.IndexColumnConstraint, 765 this=this, 766 expression=expression, 767 index_type=index_type, 768 granularity=granularity, 769 ) 770 771 def _parse_partition(self) -> t.Optional[exp.Partition]: 772 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 773 if not self._match(TokenType.PARTITION): 774 return None 775 776 if self._match_text_seq("ID"): 777 # Corresponds to the PARTITION ID <string_value> syntax 778 expressions: t.List[exp.Expression] = [ 779 self.expression(exp.PartitionId, this=self._parse_string()) 780 ] 781 else: 782 expressions = self._parse_expressions() 783 784 return self.expression(exp.Partition, expressions=expressions) 785 786 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 787 partition = self._parse_partition() 788 789 if not partition or not self._match(TokenType.FROM): 790 return None 791 792 return self.expression( 793 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 794 ) 795 796 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 797 if not self._match_text_seq("PROJECTION"): 798 return None 799 800 return self.expression( 801 exp.ProjectionDef, 802 this=self._parse_id_var(), 803 expression=self._parse_wrapped(self._parse_statement), 804 ) 805 806 def _parse_constraint(self) -> t.Optional[exp.Expression]: 807 return super()._parse_constraint() or self._parse_projection_def() 808 809 def _parse_alias( 810 self, this: t.Optional[exp.Expression], explicit: bool = False 811 ) -> t.Optional[exp.Expression]: 812 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 813 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 814 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 815 return this 816 817 return super()._parse_alias(this=this, explicit=explicit) 818 819 def _parse_expression(self) -> t.Optional[exp.Expression]: 820 this = super()._parse_expression() 821 822 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 823 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 824 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 825 self._match(TokenType.R_PAREN) 826 827 return this 828 829 def _parse_columns(self) -> exp.Expression: 830 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 831 832 while self._next and self._match_text_seq(")", "APPLY", "("): 833 self._match(TokenType.R_PAREN) 834 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 835 return this 836 837 class Generator(generator.Generator): 838 QUERY_HINTS = False 839 STRUCT_DELIMITER = ("(", ")") 840 NVL2_SUPPORTED = False 841 TABLESAMPLE_REQUIRES_PARENS = False 842 TABLESAMPLE_SIZE_IS_ROWS = False 843 TABLESAMPLE_KEYWORDS = "SAMPLE" 844 LAST_DAY_SUPPORTS_DATE_PART = False 845 CAN_IMPLEMENT_ARRAY_ANY = True 846 SUPPORTS_TO_NUMBER = False 847 JOIN_HINTS = False 848 TABLE_HINTS = False 849 GROUPINGS_SEP = "" 850 SET_OP_MODIFIERS = False 851 SUPPORTS_TABLE_ALIAS_COLUMNS = False 852 VALUES_AS_TABLE = False 853 854 STRING_TYPE_MAPPING = { 855 exp.DataType.Type.CHAR: "String", 856 exp.DataType.Type.LONGBLOB: "String", 857 exp.DataType.Type.LONGTEXT: "String", 858 exp.DataType.Type.MEDIUMBLOB: "String", 859 exp.DataType.Type.MEDIUMTEXT: "String", 860 exp.DataType.Type.TINYBLOB: "String", 861 exp.DataType.Type.TINYTEXT: "String", 862 exp.DataType.Type.TEXT: "String", 863 exp.DataType.Type.VARBINARY: "String", 864 exp.DataType.Type.VARCHAR: "String", 865 } 866 867 SUPPORTED_JSON_PATH_PARTS = { 868 exp.JSONPathKey, 869 exp.JSONPathRoot, 870 exp.JSONPathSubscript, 871 } 872 873 TYPE_MAPPING = { 874 **generator.Generator.TYPE_MAPPING, 875 **STRING_TYPE_MAPPING, 876 exp.DataType.Type.ARRAY: "Array", 877 exp.DataType.Type.BOOLEAN: "Bool", 878 exp.DataType.Type.BIGINT: "Int64", 879 exp.DataType.Type.DATE32: "Date32", 880 exp.DataType.Type.DATETIME: "DateTime", 881 exp.DataType.Type.DATETIME64: "DateTime64", 882 exp.DataType.Type.DECIMAL: "Decimal", 883 exp.DataType.Type.DECIMAL32: "Decimal32", 884 exp.DataType.Type.DECIMAL64: "Decimal64", 885 exp.DataType.Type.DECIMAL128: "Decimal128", 886 exp.DataType.Type.DECIMAL256: "Decimal256", 887 exp.DataType.Type.TIMESTAMP: "DateTime", 888 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 889 exp.DataType.Type.DOUBLE: "Float64", 890 exp.DataType.Type.ENUM: "Enum", 891 exp.DataType.Type.ENUM8: "Enum8", 892 exp.DataType.Type.ENUM16: "Enum16", 893 exp.DataType.Type.FIXEDSTRING: "FixedString", 894 exp.DataType.Type.FLOAT: "Float32", 895 exp.DataType.Type.INT: "Int32", 896 exp.DataType.Type.MEDIUMINT: "Int32", 897 exp.DataType.Type.INT128: "Int128", 898 exp.DataType.Type.INT256: "Int256", 899 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 900 exp.DataType.Type.MAP: "Map", 901 exp.DataType.Type.NESTED: "Nested", 902 exp.DataType.Type.SMALLINT: "Int16", 903 exp.DataType.Type.STRUCT: "Tuple", 904 exp.DataType.Type.TINYINT: "Int8", 905 exp.DataType.Type.UBIGINT: "UInt64", 906 exp.DataType.Type.UINT: "UInt32", 907 exp.DataType.Type.UINT128: "UInt128", 908 exp.DataType.Type.UINT256: "UInt256", 909 exp.DataType.Type.USMALLINT: "UInt16", 910 exp.DataType.Type.UTINYINT: "UInt8", 911 exp.DataType.Type.IPV4: "IPv4", 912 exp.DataType.Type.IPV6: "IPv6", 913 exp.DataType.Type.POINT: "Point", 914 exp.DataType.Type.RING: "Ring", 915 exp.DataType.Type.LINESTRING: "LineString", 916 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 917 exp.DataType.Type.POLYGON: "Polygon", 918 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 919 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 920 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 921 } 922 923 TRANSFORMS = { 924 **generator.Generator.TRANSFORMS, 925 exp.AnyValue: rename_func("any"), 926 exp.ApproxDistinct: rename_func("uniq"), 927 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 928 exp.ArraySize: rename_func("LENGTH"), 929 exp.ArraySum: rename_func("arraySum"), 930 exp.ArgMax: arg_max_or_min_no_count("argMax"), 931 exp.ArgMin: arg_max_or_min_no_count("argMin"), 932 exp.Array: inline_array_sql, 933 exp.CastToStrType: rename_func("CAST"), 934 exp.CountIf: rename_func("countIf"), 935 exp.CompressColumnConstraint: lambda self, 936 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 937 exp.ComputedColumnConstraint: lambda self, 938 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 939 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 940 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 941 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 942 exp.DateStrToDate: rename_func("toDate"), 943 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 944 exp.Explode: rename_func("arrayJoin"), 945 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 946 exp.IsNan: rename_func("isNaN"), 947 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 948 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 949 exp.JSONPathKey: json_path_key_only_name, 950 exp.JSONPathRoot: lambda *_: "", 951 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 952 exp.Nullif: rename_func("nullIf"), 953 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 954 exp.Pivot: no_pivot_sql, 955 exp.Quantile: _quantile_sql, 956 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 957 exp.Rand: rename_func("randCanonical"), 958 exp.StartsWith: rename_func("startsWith"), 959 exp.StrPosition: lambda self, e: self.func( 960 "position", e.this, e.args.get("substr"), e.args.get("position") 961 ), 962 exp.TimeToStr: lambda self, e: self.func( 963 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 964 ), 965 exp.TimeStrToTime: _timestrtotime_sql, 966 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 967 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 968 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 969 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 970 exp.MD5Digest: rename_func("MD5"), 971 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 972 exp.SHA: rename_func("SHA1"), 973 exp.SHA2: sha256_sql, 974 exp.UnixToTime: _unix_to_time_sql, 975 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 976 exp.Trim: trim_sql, 977 exp.Variance: rename_func("varSamp"), 978 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 979 exp.Stddev: rename_func("stddevSamp"), 980 exp.Chr: rename_func("CHAR"), 981 exp.Lag: lambda self, e: self.func( 982 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 983 ), 984 exp.Lead: lambda self, e: self.func( 985 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 986 ), 987 } 988 989 PROPERTIES_LOCATION = { 990 **generator.Generator.PROPERTIES_LOCATION, 991 exp.OnCluster: exp.Properties.Location.POST_NAME, 992 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 993 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 994 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 995 } 996 997 # There's no list in docs, but it can be found in Clickhouse code 998 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 999 ON_CLUSTER_TARGETS = { 1000 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1001 "DATABASE", 1002 "TABLE", 1003 "VIEW", 1004 "DICTIONARY", 1005 "INDEX", 1006 "FUNCTION", 1007 "NAMED COLLECTION", 1008 } 1009 1010 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1011 NON_NULLABLE_TYPES = { 1012 exp.DataType.Type.ARRAY, 1013 exp.DataType.Type.MAP, 1014 exp.DataType.Type.STRUCT, 1015 exp.DataType.Type.POINT, 1016 exp.DataType.Type.RING, 1017 exp.DataType.Type.LINESTRING, 1018 exp.DataType.Type.MULTILINESTRING, 1019 exp.DataType.Type.POLYGON, 1020 exp.DataType.Type.MULTIPOLYGON, 1021 } 1022 1023 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1024 strtodate_sql = self.function_fallback_sql(expression) 1025 1026 if not isinstance(expression.parent, exp.Cast): 1027 # StrToDate returns DATEs in other dialects (eg. postgres), so 1028 # this branch aims to improve the transpilation to clickhouse 1029 return f"CAST({strtodate_sql} AS DATE)" 1030 1031 return strtodate_sql 1032 1033 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1034 this = expression.this 1035 1036 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1037 return self.sql(this) 1038 1039 return super().cast_sql(expression, safe_prefix=safe_prefix) 1040 1041 def trycast_sql(self, expression: exp.TryCast) -> str: 1042 dtype = expression.to 1043 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1044 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1045 dtype.set("nullable", True) 1046 1047 return super().cast_sql(expression) 1048 1049 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1050 this = self.json_path_part(expression.this) 1051 return str(int(this) + 1) if is_int(this) else this 1052 1053 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1054 return f"AS {self.sql(expression, 'this')}" 1055 1056 def _any_to_has( 1057 self, 1058 expression: exp.EQ | exp.NEQ, 1059 default: t.Callable[[t.Any], str], 1060 prefix: str = "", 1061 ) -> str: 1062 if isinstance(expression.left, exp.Any): 1063 arr = expression.left 1064 this = expression.right 1065 elif isinstance(expression.right, exp.Any): 1066 arr = expression.right 1067 this = expression.left 1068 else: 1069 return default(expression) 1070 1071 return prefix + self.func("has", arr.this.unnest(), this) 1072 1073 def eq_sql(self, expression: exp.EQ) -> str: 1074 return self._any_to_has(expression, super().eq_sql) 1075 1076 def neq_sql(self, expression: exp.NEQ) -> str: 1077 return self._any_to_has(expression, super().neq_sql, "NOT ") 1078 1079 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1080 # Manually add a flag to make the search case-insensitive 1081 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1082 return self.func("match", expression.this, regex) 1083 1084 def datatype_sql(self, expression: exp.DataType) -> str: 1085 # String is the standard ClickHouse type, every other variant is just an alias. 1086 # Additionally, any supplied length parameter will be ignored. 1087 # 1088 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1089 if expression.this in self.STRING_TYPE_MAPPING: 1090 dtype = "String" 1091 else: 1092 dtype = super().datatype_sql(expression) 1093 1094 # This section changes the type to `Nullable(...)` if the following conditions hold: 1095 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1096 # and change their semantics 1097 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1098 # constraint: "Type of Map key must be a type, that can be represented by integer or 1099 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1100 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1101 parent = expression.parent 1102 nullable = expression.args.get("nullable") 1103 if nullable is True or ( 1104 nullable is None 1105 and not ( 1106 isinstance(parent, exp.DataType) 1107 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1108 and expression.index in (None, 0) 1109 ) 1110 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1111 ): 1112 dtype = f"Nullable({dtype})" 1113 1114 return dtype 1115 1116 def cte_sql(self, expression: exp.CTE) -> str: 1117 if expression.args.get("scalar"): 1118 this = self.sql(expression, "this") 1119 alias = self.sql(expression, "alias") 1120 return f"{this} AS {alias}" 1121 1122 return super().cte_sql(expression) 1123 1124 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1125 return super().after_limit_modifiers(expression) + [ 1126 ( 1127 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1128 if expression.args.get("settings") 1129 else "" 1130 ), 1131 ( 1132 self.seg("FORMAT ") + self.sql(expression, "format") 1133 if expression.args.get("format") 1134 else "" 1135 ), 1136 ] 1137 1138 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1139 params = self.expressions(expression, key="params", flat=True) 1140 return self.func(expression.name, *expression.expressions) + f"({params})" 1141 1142 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1143 return self.func(expression.name, *expression.expressions) 1144 1145 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1146 return self.anonymousaggfunc_sql(expression) 1147 1148 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1149 return self.parameterizedagg_sql(expression) 1150 1151 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1152 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1153 1154 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1155 return f"ON CLUSTER {self.sql(expression, 'this')}" 1156 1157 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1158 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1159 exp.Properties.Location.POST_NAME 1160 ): 1161 this_name = self.sql( 1162 expression.this if isinstance(expression.this, exp.Schema) else expression, 1163 "this", 1164 ) 1165 this_properties = " ".join( 1166 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1167 ) 1168 this_schema = self.schema_columns_sql(expression.this) 1169 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1170 1171 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1172 1173 return super().createable_sql(expression, locations) 1174 1175 def create_sql(self, expression: exp.Create) -> str: 1176 # The comment property comes last in CTAS statements, i.e. after the query 1177 query = expression.expression 1178 if isinstance(query, exp.Query): 1179 comment_prop = expression.find(exp.SchemaCommentProperty) 1180 if comment_prop: 1181 comment_prop.pop() 1182 query.replace(exp.paren(query)) 1183 else: 1184 comment_prop = None 1185 1186 create_sql = super().create_sql(expression) 1187 1188 comment_sql = self.sql(comment_prop) 1189 comment_sql = f" {comment_sql}" if comment_sql else "" 1190 1191 return f"{create_sql}{comment_sql}" 1192 1193 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1194 this = self.indent(self.sql(expression, "this")) 1195 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1196 1197 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1198 this = self.sql(expression, "this") 1199 this = f" {this}" if this else "" 1200 expr = self.sql(expression, "expression") 1201 expr = f" {expr}" if expr else "" 1202 index_type = self.sql(expression, "index_type") 1203 index_type = f" TYPE {index_type}" if index_type else "" 1204 granularity = self.sql(expression, "granularity") 1205 granularity = f" GRANULARITY {granularity}" if granularity else "" 1206 1207 return f"INDEX{this}{expr}{index_type}{granularity}" 1208 1209 def partition_sql(self, expression: exp.Partition) -> str: 1210 return f"PARTITION {self.expressions(expression, flat=True)}" 1211 1212 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1213 return f"ID {self.sql(expression.this)}" 1214 1215 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1216 return ( 1217 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1218 ) 1219 1220 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1221 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1222 1223 def is_sql(self, expression: exp.Is) -> str: 1224 is_sql = super().is_sql(expression) 1225 1226 if isinstance(expression.parent, exp.Not): 1227 # value IS NOT NULL -> NOT (value IS NULL) 1228 is_sql = self.wrap(is_sql) 1229 1230 return is_sql
161class ClickHouse(Dialect): 162 NORMALIZE_FUNCTIONS: bool | str = False 163 NULL_ORDERING = "nulls_are_last" 164 SUPPORTS_USER_DEFINED_TYPES = False 165 SAFE_DIVISION = True 166 LOG_BASE_FIRST: t.Optional[bool] = None 167 FORCE_EARLY_ALIAS_REF_EXPANSION = True 168 169 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 170 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 171 172 UNESCAPED_SEQUENCES = { 173 "\\0": "\0", 174 } 175 176 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 177 178 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 179 exp.Except: False, 180 exp.Intersect: False, 181 exp.Union: None, 182 } 183 184 class Tokenizer(tokens.Tokenizer): 185 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 186 IDENTIFIERS = ['"', "`"] 187 IDENTIFIER_ESCAPES = ["\\"] 188 STRING_ESCAPES = ["'", "\\"] 189 BIT_STRINGS = [("0b", "")] 190 HEX_STRINGS = [("0x", ""), ("0X", "")] 191 HEREDOC_STRINGS = ["$"] 192 193 KEYWORDS = { 194 **tokens.Tokenizer.KEYWORDS, 195 "ATTACH": TokenType.COMMAND, 196 "DATE32": TokenType.DATE32, 197 "DATETIME64": TokenType.DATETIME64, 198 "DICTIONARY": TokenType.DICTIONARY, 199 "ENUM8": TokenType.ENUM8, 200 "ENUM16": TokenType.ENUM16, 201 "FINAL": TokenType.FINAL, 202 "FIXEDSTRING": TokenType.FIXEDSTRING, 203 "FLOAT32": TokenType.FLOAT, 204 "FLOAT64": TokenType.DOUBLE, 205 "GLOBAL": TokenType.GLOBAL, 206 "INT256": TokenType.INT256, 207 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 208 "MAP": TokenType.MAP, 209 "NESTED": TokenType.NESTED, 210 "SAMPLE": TokenType.TABLE_SAMPLE, 211 "TUPLE": TokenType.STRUCT, 212 "UINT128": TokenType.UINT128, 213 "UINT16": TokenType.USMALLINT, 214 "UINT256": TokenType.UINT256, 215 "UINT32": TokenType.UINT, 216 "UINT64": TokenType.UBIGINT, 217 "UINT8": TokenType.UTINYINT, 218 "IPV4": TokenType.IPV4, 219 "IPV6": TokenType.IPV6, 220 "POINT": TokenType.POINT, 221 "RING": TokenType.RING, 222 "LINESTRING": TokenType.LINESTRING, 223 "MULTILINESTRING": TokenType.MULTILINESTRING, 224 "POLYGON": TokenType.POLYGON, 225 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 226 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 227 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 228 "SYSTEM": TokenType.COMMAND, 229 "PREWHERE": TokenType.PREWHERE, 230 } 231 KEYWORDS.pop("/*+") 232 233 SINGLE_TOKENS = { 234 **tokens.Tokenizer.SINGLE_TOKENS, 235 "$": TokenType.HEREDOC_STRING, 236 } 237 238 class Parser(parser.Parser): 239 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 240 # * select x from t1 union all select x from t2 limit 1; 241 # * select x from t1 union all (select x from t2 limit 1); 242 MODIFIERS_ATTACHED_TO_SET_OP = False 243 INTERVAL_SPANS = False 244 245 FUNCTIONS = { 246 **parser.Parser.FUNCTIONS, 247 "ANY": exp.AnyValue.from_arg_list, 248 "ARRAYSUM": exp.ArraySum.from_arg_list, 249 "COUNTIF": _build_count_if, 250 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 251 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 253 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATE_FORMAT": _build_date_format, 255 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 256 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 257 "FORMATDATETIME": _build_date_format, 258 "JSONEXTRACTSTRING": build_json_extract_path( 259 exp.JSONExtractScalar, zero_based_indexing=False 260 ), 261 "MAP": parser.build_var_map, 262 "MATCH": exp.RegexpLike.from_arg_list, 263 "RANDCANONICAL": exp.Rand.from_arg_list, 264 "STR_TO_DATE": _build_str_to_date, 265 "TUPLE": exp.Struct.from_arg_list, 266 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 267 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 269 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "UNIQ": exp.ApproxDistinct.from_arg_list, 271 "XOR": lambda args: exp.Xor(expressions=args), 272 "MD5": exp.MD5Digest.from_arg_list, 273 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 274 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 275 } 276 277 AGG_FUNCTIONS = { 278 "count", 279 "min", 280 "max", 281 "sum", 282 "avg", 283 "any", 284 "stddevPop", 285 "stddevSamp", 286 "varPop", 287 "varSamp", 288 "corr", 289 "covarPop", 290 "covarSamp", 291 "entropy", 292 "exponentialMovingAverage", 293 "intervalLengthSum", 294 "kolmogorovSmirnovTest", 295 "mannWhitneyUTest", 296 "median", 297 "rankCorr", 298 "sumKahan", 299 "studentTTest", 300 "welchTTest", 301 "anyHeavy", 302 "anyLast", 303 "boundingRatio", 304 "first_value", 305 "last_value", 306 "argMin", 307 "argMax", 308 "avgWeighted", 309 "topK", 310 "topKWeighted", 311 "deltaSum", 312 "deltaSumTimestamp", 313 "groupArray", 314 "groupArrayLast", 315 "groupUniqArray", 316 "groupArrayInsertAt", 317 "groupArrayMovingAvg", 318 "groupArrayMovingSum", 319 "groupArraySample", 320 "groupBitAnd", 321 "groupBitOr", 322 "groupBitXor", 323 "groupBitmap", 324 "groupBitmapAnd", 325 "groupBitmapOr", 326 "groupBitmapXor", 327 "sumWithOverflow", 328 "sumMap", 329 "minMap", 330 "maxMap", 331 "skewSamp", 332 "skewPop", 333 "kurtSamp", 334 "kurtPop", 335 "uniq", 336 "uniqExact", 337 "uniqCombined", 338 "uniqCombined64", 339 "uniqHLL12", 340 "uniqTheta", 341 "quantile", 342 "quantiles", 343 "quantileExact", 344 "quantilesExact", 345 "quantileExactLow", 346 "quantilesExactLow", 347 "quantileExactHigh", 348 "quantilesExactHigh", 349 "quantileExactWeighted", 350 "quantilesExactWeighted", 351 "quantileTiming", 352 "quantilesTiming", 353 "quantileTimingWeighted", 354 "quantilesTimingWeighted", 355 "quantileDeterministic", 356 "quantilesDeterministic", 357 "quantileTDigest", 358 "quantilesTDigest", 359 "quantileTDigestWeighted", 360 "quantilesTDigestWeighted", 361 "quantileBFloat16", 362 "quantilesBFloat16", 363 "quantileBFloat16Weighted", 364 "quantilesBFloat16Weighted", 365 "simpleLinearRegression", 366 "stochasticLinearRegression", 367 "stochasticLogisticRegression", 368 "categoricalInformationValue", 369 "contingency", 370 "cramersV", 371 "cramersVBiasCorrected", 372 "theilsU", 373 "maxIntersections", 374 "maxIntersectionsPosition", 375 "meanZTest", 376 "quantileInterpolatedWeighted", 377 "quantilesInterpolatedWeighted", 378 "quantileGK", 379 "quantilesGK", 380 "sparkBar", 381 "sumCount", 382 "largestTriangleThreeBuckets", 383 "histogram", 384 "sequenceMatch", 385 "sequenceCount", 386 "windowFunnel", 387 "retention", 388 "uniqUpTo", 389 "sequenceNextNode", 390 "exponentialTimeDecayedAvg", 391 } 392 393 AGG_FUNCTIONS_SUFFIXES = [ 394 "If", 395 "Array", 396 "ArrayIf", 397 "Map", 398 "SimpleState", 399 "State", 400 "Merge", 401 "MergeState", 402 "ForEach", 403 "Distinct", 404 "OrDefault", 405 "OrNull", 406 "Resample", 407 "ArgMin", 408 "ArgMax", 409 ] 410 411 FUNC_TOKENS = { 412 *parser.Parser.FUNC_TOKENS, 413 TokenType.SET, 414 } 415 416 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 417 418 ID_VAR_TOKENS = { 419 *parser.Parser.ID_VAR_TOKENS, 420 TokenType.LIKE, 421 } 422 423 AGG_FUNC_MAPPING = ( 424 lambda functions, suffixes: { 425 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 426 } 427 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 428 429 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 430 431 FUNCTION_PARSERS = { 432 **parser.Parser.FUNCTION_PARSERS, 433 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 434 "QUANTILE": lambda self: self._parse_quantile(), 435 "MEDIAN": lambda self: self._parse_quantile(), 436 "COLUMNS": lambda self: self._parse_columns(), 437 } 438 439 FUNCTION_PARSERS.pop("MATCH") 440 441 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 442 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 443 444 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 445 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 446 447 RANGE_PARSERS = { 448 **parser.Parser.RANGE_PARSERS, 449 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 450 and self._parse_in(this, is_global=True), 451 } 452 453 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 454 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 455 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 456 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 457 458 JOIN_KINDS = { 459 *parser.Parser.JOIN_KINDS, 460 TokenType.ANY, 461 TokenType.ASOF, 462 TokenType.ARRAY, 463 } 464 465 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 466 TokenType.ANY, 467 TokenType.ARRAY, 468 TokenType.FINAL, 469 TokenType.FORMAT, 470 TokenType.SETTINGS, 471 } 472 473 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 474 TokenType.FORMAT, 475 } 476 477 LOG_DEFAULTS_TO_LN = True 478 479 QUERY_MODIFIER_PARSERS = { 480 **parser.Parser.QUERY_MODIFIER_PARSERS, 481 TokenType.SETTINGS: lambda self: ( 482 "settings", 483 self._advance() or self._parse_csv(self._parse_assignment), 484 ), 485 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 486 } 487 488 CONSTRAINT_PARSERS = { 489 **parser.Parser.CONSTRAINT_PARSERS, 490 "INDEX": lambda self: self._parse_index_constraint(), 491 "CODEC": lambda self: self._parse_compress(), 492 } 493 494 ALTER_PARSERS = { 495 **parser.Parser.ALTER_PARSERS, 496 "REPLACE": lambda self: self._parse_alter_table_replace(), 497 } 498 499 SCHEMA_UNNAMED_CONSTRAINTS = { 500 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 501 "INDEX", 502 } 503 504 PLACEHOLDER_PARSERS = { 505 **parser.Parser.PLACEHOLDER_PARSERS, 506 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 507 } 508 509 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 510 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 511 return self._parse_lambda() 512 513 def _parse_types( 514 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 515 ) -> t.Optional[exp.Expression]: 516 dtype = super()._parse_types( 517 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 518 ) 519 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 520 # Mark every type as non-nullable which is ClickHouse's default, unless it's 521 # already marked as nullable. This marker helps us transpile types from other 522 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 523 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 524 # fail in ClickHouse without the `Nullable` type constructor. 525 dtype.set("nullable", False) 526 527 return dtype 528 529 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 530 index = self._index 531 this = self._parse_bitwise() 532 if self._match(TokenType.FROM): 533 self._retreat(index) 534 return super()._parse_extract() 535 536 # We return Anonymous here because extract and regexpExtract have different semantics, 537 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 538 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 539 # 540 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 541 self._match(TokenType.COMMA) 542 return self.expression( 543 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 544 ) 545 546 def _parse_assignment(self) -> t.Optional[exp.Expression]: 547 this = super()._parse_assignment() 548 549 if self._match(TokenType.PLACEHOLDER): 550 return self.expression( 551 exp.If, 552 this=this, 553 true=self._parse_assignment(), 554 false=self._match(TokenType.COLON) and self._parse_assignment(), 555 ) 556 557 return this 558 559 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 560 """ 561 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 562 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 563 """ 564 this = self._parse_id_var() 565 self._match(TokenType.COLON) 566 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 567 self._match_text_seq("IDENTIFIER") and "Identifier" 568 ) 569 570 if not kind: 571 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 572 elif not self._match(TokenType.R_BRACE): 573 self.raise_error("Expecting }") 574 575 return self.expression(exp.Placeholder, this=this, kind=kind) 576 577 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 578 this = super()._parse_in(this) 579 this.set("is_global", is_global) 580 return this 581 582 def _parse_table( 583 self, 584 schema: bool = False, 585 joins: bool = False, 586 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 587 parse_bracket: bool = False, 588 is_db_reference: bool = False, 589 parse_partition: bool = False, 590 ) -> t.Optional[exp.Expression]: 591 this = super()._parse_table( 592 schema=schema, 593 joins=joins, 594 alias_tokens=alias_tokens, 595 parse_bracket=parse_bracket, 596 is_db_reference=is_db_reference, 597 ) 598 599 if self._match(TokenType.FINAL): 600 this = self.expression(exp.Final, this=this) 601 602 return this 603 604 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 605 return super()._parse_position(haystack_first=True) 606 607 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 608 def _parse_cte(self) -> exp.CTE: 609 # WITH <identifier> AS <subquery expression> 610 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 611 612 if not cte: 613 # WITH <expression> AS <identifier> 614 cte = self.expression( 615 exp.CTE, 616 this=self._parse_assignment(), 617 alias=self._parse_table_alias(), 618 scalar=True, 619 ) 620 621 return cte 622 623 def _parse_join_parts( 624 self, 625 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 626 is_global = self._match(TokenType.GLOBAL) and self._prev 627 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 628 629 if kind_pre: 630 kind = self._match_set(self.JOIN_KINDS) and self._prev 631 side = self._match_set(self.JOIN_SIDES) and self._prev 632 return is_global, side, kind 633 634 return ( 635 is_global, 636 self._match_set(self.JOIN_SIDES) and self._prev, 637 self._match_set(self.JOIN_KINDS) and self._prev, 638 ) 639 640 def _parse_join( 641 self, skip_join_token: bool = False, parse_bracket: bool = False 642 ) -> t.Optional[exp.Join]: 643 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 644 if join: 645 join.set("global", join.args.pop("method", None)) 646 647 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 648 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 649 if join.kind == "ARRAY": 650 for table in join.find_all(exp.Table): 651 table.replace(table.to_column()) 652 653 return join 654 655 def _parse_function( 656 self, 657 functions: t.Optional[t.Dict[str, t.Callable]] = None, 658 anonymous: bool = False, 659 optional_parens: bool = True, 660 any_token: bool = False, 661 ) -> t.Optional[exp.Expression]: 662 expr = super()._parse_function( 663 functions=functions, 664 anonymous=anonymous, 665 optional_parens=optional_parens, 666 any_token=any_token, 667 ) 668 669 func = expr.this if isinstance(expr, exp.Window) else expr 670 671 # Aggregate functions can be split in 2 parts: <func_name><suffix> 672 parts = ( 673 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 674 ) 675 676 if parts: 677 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 678 params = self._parse_func_params(anon_func) 679 680 kwargs = { 681 "this": anon_func.this, 682 "expressions": anon_func.expressions, 683 } 684 if parts[1]: 685 kwargs["parts"] = parts 686 exp_class: t.Type[exp.Expression] = ( 687 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 688 ) 689 else: 690 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 691 692 kwargs["exp_class"] = exp_class 693 if params: 694 kwargs["params"] = params 695 696 func = self.expression(**kwargs) 697 698 if isinstance(expr, exp.Window): 699 # The window's func was parsed as Anonymous in base parser, fix its 700 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 701 expr.set("this", func) 702 elif params: 703 # Params have blocked super()._parse_function() from parsing the following window 704 # (if that exists) as they're standing between the function call and the window spec 705 expr = self._parse_window(func) 706 else: 707 expr = func 708 709 return expr 710 711 def _parse_func_params( 712 self, this: t.Optional[exp.Func] = None 713 ) -> t.Optional[t.List[exp.Expression]]: 714 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 715 return self._parse_csv(self._parse_lambda) 716 717 if self._match(TokenType.L_PAREN): 718 params = self._parse_csv(self._parse_lambda) 719 self._match_r_paren(this) 720 return params 721 722 return None 723 724 def _parse_quantile(self) -> exp.Quantile: 725 this = self._parse_lambda() 726 params = self._parse_func_params() 727 if params: 728 return self.expression(exp.Quantile, this=params[0], quantile=this) 729 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 730 731 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 732 return super()._parse_wrapped_id_vars(optional=True) 733 734 def _parse_primary_key( 735 self, wrapped_optional: bool = False, in_props: bool = False 736 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 737 return super()._parse_primary_key( 738 wrapped_optional=wrapped_optional or in_props, in_props=in_props 739 ) 740 741 def _parse_on_property(self) -> t.Optional[exp.Expression]: 742 index = self._index 743 if self._match_text_seq("CLUSTER"): 744 this = self._parse_id_var() 745 if this: 746 return self.expression(exp.OnCluster, this=this) 747 else: 748 self._retreat(index) 749 return None 750 751 def _parse_index_constraint( 752 self, kind: t.Optional[str] = None 753 ) -> exp.IndexColumnConstraint: 754 # INDEX name1 expr TYPE type1(args) GRANULARITY value 755 this = self._parse_id_var() 756 expression = self._parse_assignment() 757 758 index_type = self._match_text_seq("TYPE") and ( 759 self._parse_function() or self._parse_var() 760 ) 761 762 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 763 764 return self.expression( 765 exp.IndexColumnConstraint, 766 this=this, 767 expression=expression, 768 index_type=index_type, 769 granularity=granularity, 770 ) 771 772 def _parse_partition(self) -> t.Optional[exp.Partition]: 773 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 774 if not self._match(TokenType.PARTITION): 775 return None 776 777 if self._match_text_seq("ID"): 778 # Corresponds to the PARTITION ID <string_value> syntax 779 expressions: t.List[exp.Expression] = [ 780 self.expression(exp.PartitionId, this=self._parse_string()) 781 ] 782 else: 783 expressions = self._parse_expressions() 784 785 return self.expression(exp.Partition, expressions=expressions) 786 787 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 788 partition = self._parse_partition() 789 790 if not partition or not self._match(TokenType.FROM): 791 return None 792 793 return self.expression( 794 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 795 ) 796 797 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 798 if not self._match_text_seq("PROJECTION"): 799 return None 800 801 return self.expression( 802 exp.ProjectionDef, 803 this=self._parse_id_var(), 804 expression=self._parse_wrapped(self._parse_statement), 805 ) 806 807 def _parse_constraint(self) -> t.Optional[exp.Expression]: 808 return super()._parse_constraint() or self._parse_projection_def() 809 810 def _parse_alias( 811 self, this: t.Optional[exp.Expression], explicit: bool = False 812 ) -> t.Optional[exp.Expression]: 813 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 814 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 815 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 816 return this 817 818 return super()._parse_alias(this=this, explicit=explicit) 819 820 def _parse_expression(self) -> t.Optional[exp.Expression]: 821 this = super()._parse_expression() 822 823 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 824 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 825 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 826 self._match(TokenType.R_PAREN) 827 828 return this 829 830 def _parse_columns(self) -> exp.Expression: 831 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 832 833 while self._next and self._match_text_seq(")", "APPLY", "("): 834 self._match(TokenType.R_PAREN) 835 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 836 return this 837 838 class Generator(generator.Generator): 839 QUERY_HINTS = False 840 STRUCT_DELIMITER = ("(", ")") 841 NVL2_SUPPORTED = False 842 TABLESAMPLE_REQUIRES_PARENS = False 843 TABLESAMPLE_SIZE_IS_ROWS = False 844 TABLESAMPLE_KEYWORDS = "SAMPLE" 845 LAST_DAY_SUPPORTS_DATE_PART = False 846 CAN_IMPLEMENT_ARRAY_ANY = True 847 SUPPORTS_TO_NUMBER = False 848 JOIN_HINTS = False 849 TABLE_HINTS = False 850 GROUPINGS_SEP = "" 851 SET_OP_MODIFIERS = False 852 SUPPORTS_TABLE_ALIAS_COLUMNS = False 853 VALUES_AS_TABLE = False 854 855 STRING_TYPE_MAPPING = { 856 exp.DataType.Type.CHAR: "String", 857 exp.DataType.Type.LONGBLOB: "String", 858 exp.DataType.Type.LONGTEXT: "String", 859 exp.DataType.Type.MEDIUMBLOB: "String", 860 exp.DataType.Type.MEDIUMTEXT: "String", 861 exp.DataType.Type.TINYBLOB: "String", 862 exp.DataType.Type.TINYTEXT: "String", 863 exp.DataType.Type.TEXT: "String", 864 exp.DataType.Type.VARBINARY: "String", 865 exp.DataType.Type.VARCHAR: "String", 866 } 867 868 SUPPORTED_JSON_PATH_PARTS = { 869 exp.JSONPathKey, 870 exp.JSONPathRoot, 871 exp.JSONPathSubscript, 872 } 873 874 TYPE_MAPPING = { 875 **generator.Generator.TYPE_MAPPING, 876 **STRING_TYPE_MAPPING, 877 exp.DataType.Type.ARRAY: "Array", 878 exp.DataType.Type.BOOLEAN: "Bool", 879 exp.DataType.Type.BIGINT: "Int64", 880 exp.DataType.Type.DATE32: "Date32", 881 exp.DataType.Type.DATETIME: "DateTime", 882 exp.DataType.Type.DATETIME64: "DateTime64", 883 exp.DataType.Type.DECIMAL: "Decimal", 884 exp.DataType.Type.DECIMAL32: "Decimal32", 885 exp.DataType.Type.DECIMAL64: "Decimal64", 886 exp.DataType.Type.DECIMAL128: "Decimal128", 887 exp.DataType.Type.DECIMAL256: "Decimal256", 888 exp.DataType.Type.TIMESTAMP: "DateTime", 889 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 890 exp.DataType.Type.DOUBLE: "Float64", 891 exp.DataType.Type.ENUM: "Enum", 892 exp.DataType.Type.ENUM8: "Enum8", 893 exp.DataType.Type.ENUM16: "Enum16", 894 exp.DataType.Type.FIXEDSTRING: "FixedString", 895 exp.DataType.Type.FLOAT: "Float32", 896 exp.DataType.Type.INT: "Int32", 897 exp.DataType.Type.MEDIUMINT: "Int32", 898 exp.DataType.Type.INT128: "Int128", 899 exp.DataType.Type.INT256: "Int256", 900 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 901 exp.DataType.Type.MAP: "Map", 902 exp.DataType.Type.NESTED: "Nested", 903 exp.DataType.Type.SMALLINT: "Int16", 904 exp.DataType.Type.STRUCT: "Tuple", 905 exp.DataType.Type.TINYINT: "Int8", 906 exp.DataType.Type.UBIGINT: "UInt64", 907 exp.DataType.Type.UINT: "UInt32", 908 exp.DataType.Type.UINT128: "UInt128", 909 exp.DataType.Type.UINT256: "UInt256", 910 exp.DataType.Type.USMALLINT: "UInt16", 911 exp.DataType.Type.UTINYINT: "UInt8", 912 exp.DataType.Type.IPV4: "IPv4", 913 exp.DataType.Type.IPV6: "IPv6", 914 exp.DataType.Type.POINT: "Point", 915 exp.DataType.Type.RING: "Ring", 916 exp.DataType.Type.LINESTRING: "LineString", 917 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 918 exp.DataType.Type.POLYGON: "Polygon", 919 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 920 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 921 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 922 } 923 924 TRANSFORMS = { 925 **generator.Generator.TRANSFORMS, 926 exp.AnyValue: rename_func("any"), 927 exp.ApproxDistinct: rename_func("uniq"), 928 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 929 exp.ArraySize: rename_func("LENGTH"), 930 exp.ArraySum: rename_func("arraySum"), 931 exp.ArgMax: arg_max_or_min_no_count("argMax"), 932 exp.ArgMin: arg_max_or_min_no_count("argMin"), 933 exp.Array: inline_array_sql, 934 exp.CastToStrType: rename_func("CAST"), 935 exp.CountIf: rename_func("countIf"), 936 exp.CompressColumnConstraint: lambda self, 937 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 938 exp.ComputedColumnConstraint: lambda self, 939 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 940 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 941 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 942 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 943 exp.DateStrToDate: rename_func("toDate"), 944 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 945 exp.Explode: rename_func("arrayJoin"), 946 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 947 exp.IsNan: rename_func("isNaN"), 948 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 949 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 950 exp.JSONPathKey: json_path_key_only_name, 951 exp.JSONPathRoot: lambda *_: "", 952 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 953 exp.Nullif: rename_func("nullIf"), 954 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 955 exp.Pivot: no_pivot_sql, 956 exp.Quantile: _quantile_sql, 957 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 958 exp.Rand: rename_func("randCanonical"), 959 exp.StartsWith: rename_func("startsWith"), 960 exp.StrPosition: lambda self, e: self.func( 961 "position", e.this, e.args.get("substr"), e.args.get("position") 962 ), 963 exp.TimeToStr: lambda self, e: self.func( 964 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 965 ), 966 exp.TimeStrToTime: _timestrtotime_sql, 967 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 968 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 969 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 970 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 971 exp.MD5Digest: rename_func("MD5"), 972 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 973 exp.SHA: rename_func("SHA1"), 974 exp.SHA2: sha256_sql, 975 exp.UnixToTime: _unix_to_time_sql, 976 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 977 exp.Trim: trim_sql, 978 exp.Variance: rename_func("varSamp"), 979 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 980 exp.Stddev: rename_func("stddevSamp"), 981 exp.Chr: rename_func("CHAR"), 982 exp.Lag: lambda self, e: self.func( 983 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 984 ), 985 exp.Lead: lambda self, e: self.func( 986 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 987 ), 988 } 989 990 PROPERTIES_LOCATION = { 991 **generator.Generator.PROPERTIES_LOCATION, 992 exp.OnCluster: exp.Properties.Location.POST_NAME, 993 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 994 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 995 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 996 } 997 998 # There's no list in docs, but it can be found in Clickhouse code 999 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1000 ON_CLUSTER_TARGETS = { 1001 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1002 "DATABASE", 1003 "TABLE", 1004 "VIEW", 1005 "DICTIONARY", 1006 "INDEX", 1007 "FUNCTION", 1008 "NAMED COLLECTION", 1009 } 1010 1011 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1012 NON_NULLABLE_TYPES = { 1013 exp.DataType.Type.ARRAY, 1014 exp.DataType.Type.MAP, 1015 exp.DataType.Type.STRUCT, 1016 exp.DataType.Type.POINT, 1017 exp.DataType.Type.RING, 1018 exp.DataType.Type.LINESTRING, 1019 exp.DataType.Type.MULTILINESTRING, 1020 exp.DataType.Type.POLYGON, 1021 exp.DataType.Type.MULTIPOLYGON, 1022 } 1023 1024 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1025 strtodate_sql = self.function_fallback_sql(expression) 1026 1027 if not isinstance(expression.parent, exp.Cast): 1028 # StrToDate returns DATEs in other dialects (eg. postgres), so 1029 # this branch aims to improve the transpilation to clickhouse 1030 return f"CAST({strtodate_sql} AS DATE)" 1031 1032 return strtodate_sql 1033 1034 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1035 this = expression.this 1036 1037 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1038 return self.sql(this) 1039 1040 return super().cast_sql(expression, safe_prefix=safe_prefix) 1041 1042 def trycast_sql(self, expression: exp.TryCast) -> str: 1043 dtype = expression.to 1044 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1045 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1046 dtype.set("nullable", True) 1047 1048 return super().cast_sql(expression) 1049 1050 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1051 this = self.json_path_part(expression.this) 1052 return str(int(this) + 1) if is_int(this) else this 1053 1054 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1055 return f"AS {self.sql(expression, 'this')}" 1056 1057 def _any_to_has( 1058 self, 1059 expression: exp.EQ | exp.NEQ, 1060 default: t.Callable[[t.Any], str], 1061 prefix: str = "", 1062 ) -> str: 1063 if isinstance(expression.left, exp.Any): 1064 arr = expression.left 1065 this = expression.right 1066 elif isinstance(expression.right, exp.Any): 1067 arr = expression.right 1068 this = expression.left 1069 else: 1070 return default(expression) 1071 1072 return prefix + self.func("has", arr.this.unnest(), this) 1073 1074 def eq_sql(self, expression: exp.EQ) -> str: 1075 return self._any_to_has(expression, super().eq_sql) 1076 1077 def neq_sql(self, expression: exp.NEQ) -> str: 1078 return self._any_to_has(expression, super().neq_sql, "NOT ") 1079 1080 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1081 # Manually add a flag to make the search case-insensitive 1082 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1083 return self.func("match", expression.this, regex) 1084 1085 def datatype_sql(self, expression: exp.DataType) -> str: 1086 # String is the standard ClickHouse type, every other variant is just an alias. 1087 # Additionally, any supplied length parameter will be ignored. 1088 # 1089 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1090 if expression.this in self.STRING_TYPE_MAPPING: 1091 dtype = "String" 1092 else: 1093 dtype = super().datatype_sql(expression) 1094 1095 # This section changes the type to `Nullable(...)` if the following conditions hold: 1096 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1097 # and change their semantics 1098 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1099 # constraint: "Type of Map key must be a type, that can be represented by integer or 1100 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1101 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1102 parent = expression.parent 1103 nullable = expression.args.get("nullable") 1104 if nullable is True or ( 1105 nullable is None 1106 and not ( 1107 isinstance(parent, exp.DataType) 1108 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1109 and expression.index in (None, 0) 1110 ) 1111 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1112 ): 1113 dtype = f"Nullable({dtype})" 1114 1115 return dtype 1116 1117 def cte_sql(self, expression: exp.CTE) -> str: 1118 if expression.args.get("scalar"): 1119 this = self.sql(expression, "this") 1120 alias = self.sql(expression, "alias") 1121 return f"{this} AS {alias}" 1122 1123 return super().cte_sql(expression) 1124 1125 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1126 return super().after_limit_modifiers(expression) + [ 1127 ( 1128 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1129 if expression.args.get("settings") 1130 else "" 1131 ), 1132 ( 1133 self.seg("FORMAT ") + self.sql(expression, "format") 1134 if expression.args.get("format") 1135 else "" 1136 ), 1137 ] 1138 1139 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1140 params = self.expressions(expression, key="params", flat=True) 1141 return self.func(expression.name, *expression.expressions) + f"({params})" 1142 1143 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1144 return self.func(expression.name, *expression.expressions) 1145 1146 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1147 return self.anonymousaggfunc_sql(expression) 1148 1149 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1150 return self.parameterizedagg_sql(expression) 1151 1152 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1153 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1154 1155 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1156 return f"ON CLUSTER {self.sql(expression, 'this')}" 1157 1158 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1159 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1160 exp.Properties.Location.POST_NAME 1161 ): 1162 this_name = self.sql( 1163 expression.this if isinstance(expression.this, exp.Schema) else expression, 1164 "this", 1165 ) 1166 this_properties = " ".join( 1167 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1168 ) 1169 this_schema = self.schema_columns_sql(expression.this) 1170 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1171 1172 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1173 1174 return super().createable_sql(expression, locations) 1175 1176 def create_sql(self, expression: exp.Create) -> str: 1177 # The comment property comes last in CTAS statements, i.e. after the query 1178 query = expression.expression 1179 if isinstance(query, exp.Query): 1180 comment_prop = expression.find(exp.SchemaCommentProperty) 1181 if comment_prop: 1182 comment_prop.pop() 1183 query.replace(exp.paren(query)) 1184 else: 1185 comment_prop = None 1186 1187 create_sql = super().create_sql(expression) 1188 1189 comment_sql = self.sql(comment_prop) 1190 comment_sql = f" {comment_sql}" if comment_sql else "" 1191 1192 return f"{create_sql}{comment_sql}" 1193 1194 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1195 this = self.indent(self.sql(expression, "this")) 1196 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1197 1198 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1199 this = self.sql(expression, "this") 1200 this = f" {this}" if this else "" 1201 expr = self.sql(expression, "expression") 1202 expr = f" {expr}" if expr else "" 1203 index_type = self.sql(expression, "index_type") 1204 index_type = f" TYPE {index_type}" if index_type else "" 1205 granularity = self.sql(expression, "granularity") 1206 granularity = f" GRANULARITY {granularity}" if granularity else "" 1207 1208 return f"INDEX{this}{expr}{index_type}{granularity}" 1209 1210 def partition_sql(self, expression: exp.Partition) -> str: 1211 return f"PARTITION {self.expressions(expression, flat=True)}" 1212 1213 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1214 return f"ID {self.sql(expression.this)}" 1215 1216 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1217 return ( 1218 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1219 ) 1220 1221 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1222 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1223 1224 def is_sql(self, expression: exp.Is) -> str: 1225 is_sql = super().is_sql(expression) 1226 1227 if isinstance(expression.parent, exp.Not): 1228 # value IS NOT NULL -> NOT (value IS NULL) 1229 is_sql = self.wrap(is_sql) 1230 1231 return is_sql
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
184 class Tokenizer(tokens.Tokenizer): 185 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 186 IDENTIFIERS = ['"', "`"] 187 IDENTIFIER_ESCAPES = ["\\"] 188 STRING_ESCAPES = ["'", "\\"] 189 BIT_STRINGS = [("0b", "")] 190 HEX_STRINGS = [("0x", ""), ("0X", "")] 191 HEREDOC_STRINGS = ["$"] 192 193 KEYWORDS = { 194 **tokens.Tokenizer.KEYWORDS, 195 "ATTACH": TokenType.COMMAND, 196 "DATE32": TokenType.DATE32, 197 "DATETIME64": TokenType.DATETIME64, 198 "DICTIONARY": TokenType.DICTIONARY, 199 "ENUM8": TokenType.ENUM8, 200 "ENUM16": TokenType.ENUM16, 201 "FINAL": TokenType.FINAL, 202 "FIXEDSTRING": TokenType.FIXEDSTRING, 203 "FLOAT32": TokenType.FLOAT, 204 "FLOAT64": TokenType.DOUBLE, 205 "GLOBAL": TokenType.GLOBAL, 206 "INT256": TokenType.INT256, 207 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 208 "MAP": TokenType.MAP, 209 "NESTED": TokenType.NESTED, 210 "SAMPLE": TokenType.TABLE_SAMPLE, 211 "TUPLE": TokenType.STRUCT, 212 "UINT128": TokenType.UINT128, 213 "UINT16": TokenType.USMALLINT, 214 "UINT256": TokenType.UINT256, 215 "UINT32": TokenType.UINT, 216 "UINT64": TokenType.UBIGINT, 217 "UINT8": TokenType.UTINYINT, 218 "IPV4": TokenType.IPV4, 219 "IPV6": TokenType.IPV6, 220 "POINT": TokenType.POINT, 221 "RING": TokenType.RING, 222 "LINESTRING": TokenType.LINESTRING, 223 "MULTILINESTRING": TokenType.MULTILINESTRING, 224 "POLYGON": TokenType.POLYGON, 225 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 226 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 227 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 228 "SYSTEM": TokenType.COMMAND, 229 "PREWHERE": TokenType.PREWHERE, 230 } 231 KEYWORDS.pop("/*+") 232 233 SINGLE_TOKENS = { 234 **tokens.Tokenizer.SINGLE_TOKENS, 235 "$": TokenType.HEREDOC_STRING, 236 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
238 class Parser(parser.Parser): 239 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 240 # * select x from t1 union all select x from t2 limit 1; 241 # * select x from t1 union all (select x from t2 limit 1); 242 MODIFIERS_ATTACHED_TO_SET_OP = False 243 INTERVAL_SPANS = False 244 245 FUNCTIONS = { 246 **parser.Parser.FUNCTIONS, 247 "ANY": exp.AnyValue.from_arg_list, 248 "ARRAYSUM": exp.ArraySum.from_arg_list, 249 "COUNTIF": _build_count_if, 250 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 251 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 253 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATE_FORMAT": _build_date_format, 255 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 256 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 257 "FORMATDATETIME": _build_date_format, 258 "JSONEXTRACTSTRING": build_json_extract_path( 259 exp.JSONExtractScalar, zero_based_indexing=False 260 ), 261 "MAP": parser.build_var_map, 262 "MATCH": exp.RegexpLike.from_arg_list, 263 "RANDCANONICAL": exp.Rand.from_arg_list, 264 "STR_TO_DATE": _build_str_to_date, 265 "TUPLE": exp.Struct.from_arg_list, 266 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 267 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 269 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "UNIQ": exp.ApproxDistinct.from_arg_list, 271 "XOR": lambda args: exp.Xor(expressions=args), 272 "MD5": exp.MD5Digest.from_arg_list, 273 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 274 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 275 } 276 277 AGG_FUNCTIONS = { 278 "count", 279 "min", 280 "max", 281 "sum", 282 "avg", 283 "any", 284 "stddevPop", 285 "stddevSamp", 286 "varPop", 287 "varSamp", 288 "corr", 289 "covarPop", 290 "covarSamp", 291 "entropy", 292 "exponentialMovingAverage", 293 "intervalLengthSum", 294 "kolmogorovSmirnovTest", 295 "mannWhitneyUTest", 296 "median", 297 "rankCorr", 298 "sumKahan", 299 "studentTTest", 300 "welchTTest", 301 "anyHeavy", 302 "anyLast", 303 "boundingRatio", 304 "first_value", 305 "last_value", 306 "argMin", 307 "argMax", 308 "avgWeighted", 309 "topK", 310 "topKWeighted", 311 "deltaSum", 312 "deltaSumTimestamp", 313 "groupArray", 314 "groupArrayLast", 315 "groupUniqArray", 316 "groupArrayInsertAt", 317 "groupArrayMovingAvg", 318 "groupArrayMovingSum", 319 "groupArraySample", 320 "groupBitAnd", 321 "groupBitOr", 322 "groupBitXor", 323 "groupBitmap", 324 "groupBitmapAnd", 325 "groupBitmapOr", 326 "groupBitmapXor", 327 "sumWithOverflow", 328 "sumMap", 329 "minMap", 330 "maxMap", 331 "skewSamp", 332 "skewPop", 333 "kurtSamp", 334 "kurtPop", 335 "uniq", 336 "uniqExact", 337 "uniqCombined", 338 "uniqCombined64", 339 "uniqHLL12", 340 "uniqTheta", 341 "quantile", 342 "quantiles", 343 "quantileExact", 344 "quantilesExact", 345 "quantileExactLow", 346 "quantilesExactLow", 347 "quantileExactHigh", 348 "quantilesExactHigh", 349 "quantileExactWeighted", 350 "quantilesExactWeighted", 351 "quantileTiming", 352 "quantilesTiming", 353 "quantileTimingWeighted", 354 "quantilesTimingWeighted", 355 "quantileDeterministic", 356 "quantilesDeterministic", 357 "quantileTDigest", 358 "quantilesTDigest", 359 "quantileTDigestWeighted", 360 "quantilesTDigestWeighted", 361 "quantileBFloat16", 362 "quantilesBFloat16", 363 "quantileBFloat16Weighted", 364 "quantilesBFloat16Weighted", 365 "simpleLinearRegression", 366 "stochasticLinearRegression", 367 "stochasticLogisticRegression", 368 "categoricalInformationValue", 369 "contingency", 370 "cramersV", 371 "cramersVBiasCorrected", 372 "theilsU", 373 "maxIntersections", 374 "maxIntersectionsPosition", 375 "meanZTest", 376 "quantileInterpolatedWeighted", 377 "quantilesInterpolatedWeighted", 378 "quantileGK", 379 "quantilesGK", 380 "sparkBar", 381 "sumCount", 382 "largestTriangleThreeBuckets", 383 "histogram", 384 "sequenceMatch", 385 "sequenceCount", 386 "windowFunnel", 387 "retention", 388 "uniqUpTo", 389 "sequenceNextNode", 390 "exponentialTimeDecayedAvg", 391 } 392 393 AGG_FUNCTIONS_SUFFIXES = [ 394 "If", 395 "Array", 396 "ArrayIf", 397 "Map", 398 "SimpleState", 399 "State", 400 "Merge", 401 "MergeState", 402 "ForEach", 403 "Distinct", 404 "OrDefault", 405 "OrNull", 406 "Resample", 407 "ArgMin", 408 "ArgMax", 409 ] 410 411 FUNC_TOKENS = { 412 *parser.Parser.FUNC_TOKENS, 413 TokenType.SET, 414 } 415 416 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 417 418 ID_VAR_TOKENS = { 419 *parser.Parser.ID_VAR_TOKENS, 420 TokenType.LIKE, 421 } 422 423 AGG_FUNC_MAPPING = ( 424 lambda functions, suffixes: { 425 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 426 } 427 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 428 429 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 430 431 FUNCTION_PARSERS = { 432 **parser.Parser.FUNCTION_PARSERS, 433 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 434 "QUANTILE": lambda self: self._parse_quantile(), 435 "MEDIAN": lambda self: self._parse_quantile(), 436 "COLUMNS": lambda self: self._parse_columns(), 437 } 438 439 FUNCTION_PARSERS.pop("MATCH") 440 441 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 442 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 443 444 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 445 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 446 447 RANGE_PARSERS = { 448 **parser.Parser.RANGE_PARSERS, 449 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 450 and self._parse_in(this, is_global=True), 451 } 452 453 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 454 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 455 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 456 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 457 458 JOIN_KINDS = { 459 *parser.Parser.JOIN_KINDS, 460 TokenType.ANY, 461 TokenType.ASOF, 462 TokenType.ARRAY, 463 } 464 465 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 466 TokenType.ANY, 467 TokenType.ARRAY, 468 TokenType.FINAL, 469 TokenType.FORMAT, 470 TokenType.SETTINGS, 471 } 472 473 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 474 TokenType.FORMAT, 475 } 476 477 LOG_DEFAULTS_TO_LN = True 478 479 QUERY_MODIFIER_PARSERS = { 480 **parser.Parser.QUERY_MODIFIER_PARSERS, 481 TokenType.SETTINGS: lambda self: ( 482 "settings", 483 self._advance() or self._parse_csv(self._parse_assignment), 484 ), 485 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 486 } 487 488 CONSTRAINT_PARSERS = { 489 **parser.Parser.CONSTRAINT_PARSERS, 490 "INDEX": lambda self: self._parse_index_constraint(), 491 "CODEC": lambda self: self._parse_compress(), 492 } 493 494 ALTER_PARSERS = { 495 **parser.Parser.ALTER_PARSERS, 496 "REPLACE": lambda self: self._parse_alter_table_replace(), 497 } 498 499 SCHEMA_UNNAMED_CONSTRAINTS = { 500 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 501 "INDEX", 502 } 503 504 PLACEHOLDER_PARSERS = { 505 **parser.Parser.PLACEHOLDER_PARSERS, 506 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 507 } 508 509 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 510 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 511 return self._parse_lambda() 512 513 def _parse_types( 514 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 515 ) -> t.Optional[exp.Expression]: 516 dtype = super()._parse_types( 517 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 518 ) 519 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 520 # Mark every type as non-nullable which is ClickHouse's default, unless it's 521 # already marked as nullable. This marker helps us transpile types from other 522 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 523 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 524 # fail in ClickHouse without the `Nullable` type constructor. 525 dtype.set("nullable", False) 526 527 return dtype 528 529 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 530 index = self._index 531 this = self._parse_bitwise() 532 if self._match(TokenType.FROM): 533 self._retreat(index) 534 return super()._parse_extract() 535 536 # We return Anonymous here because extract and regexpExtract have different semantics, 537 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 538 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 539 # 540 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 541 self._match(TokenType.COMMA) 542 return self.expression( 543 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 544 ) 545 546 def _parse_assignment(self) -> t.Optional[exp.Expression]: 547 this = super()._parse_assignment() 548 549 if self._match(TokenType.PLACEHOLDER): 550 return self.expression( 551 exp.If, 552 this=this, 553 true=self._parse_assignment(), 554 false=self._match(TokenType.COLON) and self._parse_assignment(), 555 ) 556 557 return this 558 559 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 560 """ 561 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 562 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 563 """ 564 this = self._parse_id_var() 565 self._match(TokenType.COLON) 566 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 567 self._match_text_seq("IDENTIFIER") and "Identifier" 568 ) 569 570 if not kind: 571 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 572 elif not self._match(TokenType.R_BRACE): 573 self.raise_error("Expecting }") 574 575 return self.expression(exp.Placeholder, this=this, kind=kind) 576 577 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 578 this = super()._parse_in(this) 579 this.set("is_global", is_global) 580 return this 581 582 def _parse_table( 583 self, 584 schema: bool = False, 585 joins: bool = False, 586 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 587 parse_bracket: bool = False, 588 is_db_reference: bool = False, 589 parse_partition: bool = False, 590 ) -> t.Optional[exp.Expression]: 591 this = super()._parse_table( 592 schema=schema, 593 joins=joins, 594 alias_tokens=alias_tokens, 595 parse_bracket=parse_bracket, 596 is_db_reference=is_db_reference, 597 ) 598 599 if self._match(TokenType.FINAL): 600 this = self.expression(exp.Final, this=this) 601 602 return this 603 604 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 605 return super()._parse_position(haystack_first=True) 606 607 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 608 def _parse_cte(self) -> exp.CTE: 609 # WITH <identifier> AS <subquery expression> 610 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 611 612 if not cte: 613 # WITH <expression> AS <identifier> 614 cte = self.expression( 615 exp.CTE, 616 this=self._parse_assignment(), 617 alias=self._parse_table_alias(), 618 scalar=True, 619 ) 620 621 return cte 622 623 def _parse_join_parts( 624 self, 625 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 626 is_global = self._match(TokenType.GLOBAL) and self._prev 627 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 628 629 if kind_pre: 630 kind = self._match_set(self.JOIN_KINDS) and self._prev 631 side = self._match_set(self.JOIN_SIDES) and self._prev 632 return is_global, side, kind 633 634 return ( 635 is_global, 636 self._match_set(self.JOIN_SIDES) and self._prev, 637 self._match_set(self.JOIN_KINDS) and self._prev, 638 ) 639 640 def _parse_join( 641 self, skip_join_token: bool = False, parse_bracket: bool = False 642 ) -> t.Optional[exp.Join]: 643 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 644 if join: 645 join.set("global", join.args.pop("method", None)) 646 647 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 648 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 649 if join.kind == "ARRAY": 650 for table in join.find_all(exp.Table): 651 table.replace(table.to_column()) 652 653 return join 654 655 def _parse_function( 656 self, 657 functions: t.Optional[t.Dict[str, t.Callable]] = None, 658 anonymous: bool = False, 659 optional_parens: bool = True, 660 any_token: bool = False, 661 ) -> t.Optional[exp.Expression]: 662 expr = super()._parse_function( 663 functions=functions, 664 anonymous=anonymous, 665 optional_parens=optional_parens, 666 any_token=any_token, 667 ) 668 669 func = expr.this if isinstance(expr, exp.Window) else expr 670 671 # Aggregate functions can be split in 2 parts: <func_name><suffix> 672 parts = ( 673 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 674 ) 675 676 if parts: 677 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 678 params = self._parse_func_params(anon_func) 679 680 kwargs = { 681 "this": anon_func.this, 682 "expressions": anon_func.expressions, 683 } 684 if parts[1]: 685 kwargs["parts"] = parts 686 exp_class: t.Type[exp.Expression] = ( 687 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 688 ) 689 else: 690 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 691 692 kwargs["exp_class"] = exp_class 693 if params: 694 kwargs["params"] = params 695 696 func = self.expression(**kwargs) 697 698 if isinstance(expr, exp.Window): 699 # The window's func was parsed as Anonymous in base parser, fix its 700 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 701 expr.set("this", func) 702 elif params: 703 # Params have blocked super()._parse_function() from parsing the following window 704 # (if that exists) as they're standing between the function call and the window spec 705 expr = self._parse_window(func) 706 else: 707 expr = func 708 709 return expr 710 711 def _parse_func_params( 712 self, this: t.Optional[exp.Func] = None 713 ) -> t.Optional[t.List[exp.Expression]]: 714 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 715 return self._parse_csv(self._parse_lambda) 716 717 if self._match(TokenType.L_PAREN): 718 params = self._parse_csv(self._parse_lambda) 719 self._match_r_paren(this) 720 return params 721 722 return None 723 724 def _parse_quantile(self) -> exp.Quantile: 725 this = self._parse_lambda() 726 params = self._parse_func_params() 727 if params: 728 return self.expression(exp.Quantile, this=params[0], quantile=this) 729 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 730 731 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 732 return super()._parse_wrapped_id_vars(optional=True) 733 734 def _parse_primary_key( 735 self, wrapped_optional: bool = False, in_props: bool = False 736 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 737 return super()._parse_primary_key( 738 wrapped_optional=wrapped_optional or in_props, in_props=in_props 739 ) 740 741 def _parse_on_property(self) -> t.Optional[exp.Expression]: 742 index = self._index 743 if self._match_text_seq("CLUSTER"): 744 this = self._parse_id_var() 745 if this: 746 return self.expression(exp.OnCluster, this=this) 747 else: 748 self._retreat(index) 749 return None 750 751 def _parse_index_constraint( 752 self, kind: t.Optional[str] = None 753 ) -> exp.IndexColumnConstraint: 754 # INDEX name1 expr TYPE type1(args) GRANULARITY value 755 this = self._parse_id_var() 756 expression = self._parse_assignment() 757 758 index_type = self._match_text_seq("TYPE") and ( 759 self._parse_function() or self._parse_var() 760 ) 761 762 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 763 764 return self.expression( 765 exp.IndexColumnConstraint, 766 this=this, 767 expression=expression, 768 index_type=index_type, 769 granularity=granularity, 770 ) 771 772 def _parse_partition(self) -> t.Optional[exp.Partition]: 773 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 774 if not self._match(TokenType.PARTITION): 775 return None 776 777 if self._match_text_seq("ID"): 778 # Corresponds to the PARTITION ID <string_value> syntax 779 expressions: t.List[exp.Expression] = [ 780 self.expression(exp.PartitionId, this=self._parse_string()) 781 ] 782 else: 783 expressions = self._parse_expressions() 784 785 return self.expression(exp.Partition, expressions=expressions) 786 787 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 788 partition = self._parse_partition() 789 790 if not partition or not self._match(TokenType.FROM): 791 return None 792 793 return self.expression( 794 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 795 ) 796 797 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 798 if not self._match_text_seq("PROJECTION"): 799 return None 800 801 return self.expression( 802 exp.ProjectionDef, 803 this=self._parse_id_var(), 804 expression=self._parse_wrapped(self._parse_statement), 805 ) 806 807 def _parse_constraint(self) -> t.Optional[exp.Expression]: 808 return super()._parse_constraint() or self._parse_projection_def() 809 810 def _parse_alias( 811 self, this: t.Optional[exp.Expression], explicit: bool = False 812 ) -> t.Optional[exp.Expression]: 813 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 814 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 815 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 816 return this 817 818 return super()._parse_alias(this=this, explicit=explicit) 819 820 def _parse_expression(self) -> t.Optional[exp.Expression]: 821 this = super()._parse_expression() 822 823 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 824 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 825 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 826 self._match(TokenType.R_PAREN) 827 828 return this 829 830 def _parse_columns(self) -> exp.Expression: 831 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 832 833 while self._next and self._match_text_seq(")", "APPLY", "("): 834 self._match(TokenType.R_PAREN) 835 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 836 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
838 class Generator(generator.Generator): 839 QUERY_HINTS = False 840 STRUCT_DELIMITER = ("(", ")") 841 NVL2_SUPPORTED = False 842 TABLESAMPLE_REQUIRES_PARENS = False 843 TABLESAMPLE_SIZE_IS_ROWS = False 844 TABLESAMPLE_KEYWORDS = "SAMPLE" 845 LAST_DAY_SUPPORTS_DATE_PART = False 846 CAN_IMPLEMENT_ARRAY_ANY = True 847 SUPPORTS_TO_NUMBER = False 848 JOIN_HINTS = False 849 TABLE_HINTS = False 850 GROUPINGS_SEP = "" 851 SET_OP_MODIFIERS = False 852 SUPPORTS_TABLE_ALIAS_COLUMNS = False 853 VALUES_AS_TABLE = False 854 855 STRING_TYPE_MAPPING = { 856 exp.DataType.Type.CHAR: "String", 857 exp.DataType.Type.LONGBLOB: "String", 858 exp.DataType.Type.LONGTEXT: "String", 859 exp.DataType.Type.MEDIUMBLOB: "String", 860 exp.DataType.Type.MEDIUMTEXT: "String", 861 exp.DataType.Type.TINYBLOB: "String", 862 exp.DataType.Type.TINYTEXT: "String", 863 exp.DataType.Type.TEXT: "String", 864 exp.DataType.Type.VARBINARY: "String", 865 exp.DataType.Type.VARCHAR: "String", 866 } 867 868 SUPPORTED_JSON_PATH_PARTS = { 869 exp.JSONPathKey, 870 exp.JSONPathRoot, 871 exp.JSONPathSubscript, 872 } 873 874 TYPE_MAPPING = { 875 **generator.Generator.TYPE_MAPPING, 876 **STRING_TYPE_MAPPING, 877 exp.DataType.Type.ARRAY: "Array", 878 exp.DataType.Type.BOOLEAN: "Bool", 879 exp.DataType.Type.BIGINT: "Int64", 880 exp.DataType.Type.DATE32: "Date32", 881 exp.DataType.Type.DATETIME: "DateTime", 882 exp.DataType.Type.DATETIME64: "DateTime64", 883 exp.DataType.Type.DECIMAL: "Decimal", 884 exp.DataType.Type.DECIMAL32: "Decimal32", 885 exp.DataType.Type.DECIMAL64: "Decimal64", 886 exp.DataType.Type.DECIMAL128: "Decimal128", 887 exp.DataType.Type.DECIMAL256: "Decimal256", 888 exp.DataType.Type.TIMESTAMP: "DateTime", 889 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 890 exp.DataType.Type.DOUBLE: "Float64", 891 exp.DataType.Type.ENUM: "Enum", 892 exp.DataType.Type.ENUM8: "Enum8", 893 exp.DataType.Type.ENUM16: "Enum16", 894 exp.DataType.Type.FIXEDSTRING: "FixedString", 895 exp.DataType.Type.FLOAT: "Float32", 896 exp.DataType.Type.INT: "Int32", 897 exp.DataType.Type.MEDIUMINT: "Int32", 898 exp.DataType.Type.INT128: "Int128", 899 exp.DataType.Type.INT256: "Int256", 900 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 901 exp.DataType.Type.MAP: "Map", 902 exp.DataType.Type.NESTED: "Nested", 903 exp.DataType.Type.SMALLINT: "Int16", 904 exp.DataType.Type.STRUCT: "Tuple", 905 exp.DataType.Type.TINYINT: "Int8", 906 exp.DataType.Type.UBIGINT: "UInt64", 907 exp.DataType.Type.UINT: "UInt32", 908 exp.DataType.Type.UINT128: "UInt128", 909 exp.DataType.Type.UINT256: "UInt256", 910 exp.DataType.Type.USMALLINT: "UInt16", 911 exp.DataType.Type.UTINYINT: "UInt8", 912 exp.DataType.Type.IPV4: "IPv4", 913 exp.DataType.Type.IPV6: "IPv6", 914 exp.DataType.Type.POINT: "Point", 915 exp.DataType.Type.RING: "Ring", 916 exp.DataType.Type.LINESTRING: "LineString", 917 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 918 exp.DataType.Type.POLYGON: "Polygon", 919 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 920 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 921 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 922 } 923 924 TRANSFORMS = { 925 **generator.Generator.TRANSFORMS, 926 exp.AnyValue: rename_func("any"), 927 exp.ApproxDistinct: rename_func("uniq"), 928 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 929 exp.ArraySize: rename_func("LENGTH"), 930 exp.ArraySum: rename_func("arraySum"), 931 exp.ArgMax: arg_max_or_min_no_count("argMax"), 932 exp.ArgMin: arg_max_or_min_no_count("argMin"), 933 exp.Array: inline_array_sql, 934 exp.CastToStrType: rename_func("CAST"), 935 exp.CountIf: rename_func("countIf"), 936 exp.CompressColumnConstraint: lambda self, 937 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 938 exp.ComputedColumnConstraint: lambda self, 939 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 940 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 941 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 942 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 943 exp.DateStrToDate: rename_func("toDate"), 944 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 945 exp.Explode: rename_func("arrayJoin"), 946 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 947 exp.IsNan: rename_func("isNaN"), 948 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 949 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 950 exp.JSONPathKey: json_path_key_only_name, 951 exp.JSONPathRoot: lambda *_: "", 952 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 953 exp.Nullif: rename_func("nullIf"), 954 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 955 exp.Pivot: no_pivot_sql, 956 exp.Quantile: _quantile_sql, 957 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 958 exp.Rand: rename_func("randCanonical"), 959 exp.StartsWith: rename_func("startsWith"), 960 exp.StrPosition: lambda self, e: self.func( 961 "position", e.this, e.args.get("substr"), e.args.get("position") 962 ), 963 exp.TimeToStr: lambda self, e: self.func( 964 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 965 ), 966 exp.TimeStrToTime: _timestrtotime_sql, 967 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 968 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 969 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 970 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 971 exp.MD5Digest: rename_func("MD5"), 972 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 973 exp.SHA: rename_func("SHA1"), 974 exp.SHA2: sha256_sql, 975 exp.UnixToTime: _unix_to_time_sql, 976 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 977 exp.Trim: trim_sql, 978 exp.Variance: rename_func("varSamp"), 979 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 980 exp.Stddev: rename_func("stddevSamp"), 981 exp.Chr: rename_func("CHAR"), 982 exp.Lag: lambda self, e: self.func( 983 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 984 ), 985 exp.Lead: lambda self, e: self.func( 986 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 987 ), 988 } 989 990 PROPERTIES_LOCATION = { 991 **generator.Generator.PROPERTIES_LOCATION, 992 exp.OnCluster: exp.Properties.Location.POST_NAME, 993 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 994 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 995 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 996 } 997 998 # There's no list in docs, but it can be found in Clickhouse code 999 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1000 ON_CLUSTER_TARGETS = { 1001 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1002 "DATABASE", 1003 "TABLE", 1004 "VIEW", 1005 "DICTIONARY", 1006 "INDEX", 1007 "FUNCTION", 1008 "NAMED COLLECTION", 1009 } 1010 1011 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1012 NON_NULLABLE_TYPES = { 1013 exp.DataType.Type.ARRAY, 1014 exp.DataType.Type.MAP, 1015 exp.DataType.Type.STRUCT, 1016 exp.DataType.Type.POINT, 1017 exp.DataType.Type.RING, 1018 exp.DataType.Type.LINESTRING, 1019 exp.DataType.Type.MULTILINESTRING, 1020 exp.DataType.Type.POLYGON, 1021 exp.DataType.Type.MULTIPOLYGON, 1022 } 1023 1024 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1025 strtodate_sql = self.function_fallback_sql(expression) 1026 1027 if not isinstance(expression.parent, exp.Cast): 1028 # StrToDate returns DATEs in other dialects (eg. postgres), so 1029 # this branch aims to improve the transpilation to clickhouse 1030 return f"CAST({strtodate_sql} AS DATE)" 1031 1032 return strtodate_sql 1033 1034 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1035 this = expression.this 1036 1037 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1038 return self.sql(this) 1039 1040 return super().cast_sql(expression, safe_prefix=safe_prefix) 1041 1042 def trycast_sql(self, expression: exp.TryCast) -> str: 1043 dtype = expression.to 1044 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1045 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1046 dtype.set("nullable", True) 1047 1048 return super().cast_sql(expression) 1049 1050 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1051 this = self.json_path_part(expression.this) 1052 return str(int(this) + 1) if is_int(this) else this 1053 1054 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1055 return f"AS {self.sql(expression, 'this')}" 1056 1057 def _any_to_has( 1058 self, 1059 expression: exp.EQ | exp.NEQ, 1060 default: t.Callable[[t.Any], str], 1061 prefix: str = "", 1062 ) -> str: 1063 if isinstance(expression.left, exp.Any): 1064 arr = expression.left 1065 this = expression.right 1066 elif isinstance(expression.right, exp.Any): 1067 arr = expression.right 1068 this = expression.left 1069 else: 1070 return default(expression) 1071 1072 return prefix + self.func("has", arr.this.unnest(), this) 1073 1074 def eq_sql(self, expression: exp.EQ) -> str: 1075 return self._any_to_has(expression, super().eq_sql) 1076 1077 def neq_sql(self, expression: exp.NEQ) -> str: 1078 return self._any_to_has(expression, super().neq_sql, "NOT ") 1079 1080 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1081 # Manually add a flag to make the search case-insensitive 1082 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1083 return self.func("match", expression.this, regex) 1084 1085 def datatype_sql(self, expression: exp.DataType) -> str: 1086 # String is the standard ClickHouse type, every other variant is just an alias. 1087 # Additionally, any supplied length parameter will be ignored. 1088 # 1089 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1090 if expression.this in self.STRING_TYPE_MAPPING: 1091 dtype = "String" 1092 else: 1093 dtype = super().datatype_sql(expression) 1094 1095 # This section changes the type to `Nullable(...)` if the following conditions hold: 1096 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1097 # and change their semantics 1098 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1099 # constraint: "Type of Map key must be a type, that can be represented by integer or 1100 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1101 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1102 parent = expression.parent 1103 nullable = expression.args.get("nullable") 1104 if nullable is True or ( 1105 nullable is None 1106 and not ( 1107 isinstance(parent, exp.DataType) 1108 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1109 and expression.index in (None, 0) 1110 ) 1111 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1112 ): 1113 dtype = f"Nullable({dtype})" 1114 1115 return dtype 1116 1117 def cte_sql(self, expression: exp.CTE) -> str: 1118 if expression.args.get("scalar"): 1119 this = self.sql(expression, "this") 1120 alias = self.sql(expression, "alias") 1121 return f"{this} AS {alias}" 1122 1123 return super().cte_sql(expression) 1124 1125 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1126 return super().after_limit_modifiers(expression) + [ 1127 ( 1128 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1129 if expression.args.get("settings") 1130 else "" 1131 ), 1132 ( 1133 self.seg("FORMAT ") + self.sql(expression, "format") 1134 if expression.args.get("format") 1135 else "" 1136 ), 1137 ] 1138 1139 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1140 params = self.expressions(expression, key="params", flat=True) 1141 return self.func(expression.name, *expression.expressions) + f"({params})" 1142 1143 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1144 return self.func(expression.name, *expression.expressions) 1145 1146 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1147 return self.anonymousaggfunc_sql(expression) 1148 1149 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1150 return self.parameterizedagg_sql(expression) 1151 1152 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1153 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1154 1155 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1156 return f"ON CLUSTER {self.sql(expression, 'this')}" 1157 1158 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1159 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1160 exp.Properties.Location.POST_NAME 1161 ): 1162 this_name = self.sql( 1163 expression.this if isinstance(expression.this, exp.Schema) else expression, 1164 "this", 1165 ) 1166 this_properties = " ".join( 1167 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1168 ) 1169 this_schema = self.schema_columns_sql(expression.this) 1170 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1171 1172 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1173 1174 return super().createable_sql(expression, locations) 1175 1176 def create_sql(self, expression: exp.Create) -> str: 1177 # The comment property comes last in CTAS statements, i.e. after the query 1178 query = expression.expression 1179 if isinstance(query, exp.Query): 1180 comment_prop = expression.find(exp.SchemaCommentProperty) 1181 if comment_prop: 1182 comment_prop.pop() 1183 query.replace(exp.paren(query)) 1184 else: 1185 comment_prop = None 1186 1187 create_sql = super().create_sql(expression) 1188 1189 comment_sql = self.sql(comment_prop) 1190 comment_sql = f" {comment_sql}" if comment_sql else "" 1191 1192 return f"{create_sql}{comment_sql}" 1193 1194 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1195 this = self.indent(self.sql(expression, "this")) 1196 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1197 1198 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1199 this = self.sql(expression, "this") 1200 this = f" {this}" if this else "" 1201 expr = self.sql(expression, "expression") 1202 expr = f" {expr}" if expr else "" 1203 index_type = self.sql(expression, "index_type") 1204 index_type = f" TYPE {index_type}" if index_type else "" 1205 granularity = self.sql(expression, "granularity") 1206 granularity = f" GRANULARITY {granularity}" if granularity else "" 1207 1208 return f"INDEX{this}{expr}{index_type}{granularity}" 1209 1210 def partition_sql(self, expression: exp.Partition) -> str: 1211 return f"PARTITION {self.expressions(expression, flat=True)}" 1212 1213 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1214 return f"ID {self.sql(expression.this)}" 1215 1216 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1217 return ( 1218 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1219 ) 1220 1221 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1222 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1223 1224 def is_sql(self, expression: exp.Is) -> str: 1225 is_sql = super().is_sql(expression) 1226 1227 if isinstance(expression.parent, exp.Not): 1228 # value IS NOT NULL -> NOT (value IS NULL) 1229 is_sql = self.wrap(is_sql) 1230 1231 return is_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1024 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1025 strtodate_sql = self.function_fallback_sql(expression) 1026 1027 if not isinstance(expression.parent, exp.Cast): 1028 # StrToDate returns DATEs in other dialects (eg. postgres), so 1029 # this branch aims to improve the transpilation to clickhouse 1030 return f"CAST({strtodate_sql} AS DATE)" 1031 1032 return strtodate_sql
1034 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1035 this = expression.this 1036 1037 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1038 return self.sql(this) 1039 1040 return super().cast_sql(expression, safe_prefix=safe_prefix)
1042 def trycast_sql(self, expression: exp.TryCast) -> str: 1043 dtype = expression.to 1044 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1045 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1046 dtype.set("nullable", True) 1047 1048 return super().cast_sql(expression)
1085 def datatype_sql(self, expression: exp.DataType) -> str: 1086 # String is the standard ClickHouse type, every other variant is just an alias. 1087 # Additionally, any supplied length parameter will be ignored. 1088 # 1089 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1090 if expression.this in self.STRING_TYPE_MAPPING: 1091 dtype = "String" 1092 else: 1093 dtype = super().datatype_sql(expression) 1094 1095 # This section changes the type to `Nullable(...)` if the following conditions hold: 1096 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1097 # and change their semantics 1098 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1099 # constraint: "Type of Map key must be a type, that can be represented by integer or 1100 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1101 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1102 parent = expression.parent 1103 nullable = expression.args.get("nullable") 1104 if nullable is True or ( 1105 nullable is None 1106 and not ( 1107 isinstance(parent, exp.DataType) 1108 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1109 and expression.index in (None, 0) 1110 ) 1111 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1112 ): 1113 dtype = f"Nullable({dtype})" 1114 1115 return dtype
1125 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1126 return super().after_limit_modifiers(expression) + [ 1127 ( 1128 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1129 if expression.args.get("settings") 1130 else "" 1131 ), 1132 ( 1133 self.seg("FORMAT ") + self.sql(expression, "format") 1134 if expression.args.get("format") 1135 else "" 1136 ), 1137 ]
1158 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1159 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1160 exp.Properties.Location.POST_NAME 1161 ): 1162 this_name = self.sql( 1163 expression.this if isinstance(expression.this, exp.Schema) else expression, 1164 "this", 1165 ) 1166 this_properties = " ".join( 1167 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1168 ) 1169 this_schema = self.schema_columns_sql(expression.this) 1170 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1171 1172 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1173 1174 return super().createable_sql(expression, locations)
1176 def create_sql(self, expression: exp.Create) -> str: 1177 # The comment property comes last in CTAS statements, i.e. after the query 1178 query = expression.expression 1179 if isinstance(query, exp.Query): 1180 comment_prop = expression.find(exp.SchemaCommentProperty) 1181 if comment_prop: 1182 comment_prop.pop() 1183 query.replace(exp.paren(query)) 1184 else: 1185 comment_prop = None 1186 1187 create_sql = super().create_sql(expression) 1188 1189 comment_sql = self.sql(comment_prop) 1190 comment_sql = f" {comment_sql}" if comment_sql else "" 1191 1192 return f"{create_sql}{comment_sql}"
1198 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1199 this = self.sql(expression, "this") 1200 this = f" {this}" if this else "" 1201 expr = self.sql(expression, "expression") 1202 expr = f" {expr}" if expr else "" 1203 index_type = self.sql(expression, "index_type") 1204 index_type = f" TYPE {index_type}" if index_type else "" 1205 granularity = self.sql(expression, "granularity") 1206 granularity = f" GRANULARITY {granularity}" if granularity else "" 1207 1208 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql