sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21 trim_sql, 22) 23from sqlglot.generator import Generator 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import Token, TokenType 26from sqlglot.generator import unsupported_args 27 28DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 29 30 31def _build_date_format(args: t.List) -> exp.TimeToStr: 32 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 33 34 timezone = seq_get(args, 2) 35 if timezone: 36 expr.set("zone", timezone) 37 38 return expr 39 40 41def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 42 scale = expression.args.get("scale") 43 timestamp = expression.this 44 45 if scale in (None, exp.UnixToTime.SECONDS): 46 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 47 if scale == exp.UnixToTime.MILLIS: 48 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.MICROS: 50 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 if scale == exp.UnixToTime.NANOS: 52 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 53 54 return self.func( 55 "fromUnixTimestamp", 56 exp.cast( 57 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 58 ), 59 ) 60 61 62def _lower_func(sql: str) -> str: 63 index = sql.index("(") 64 return sql[:index].lower() + sql[index:] 65 66 67def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 68 quantile = expression.args["quantile"] 69 args = f"({self.sql(expression, 'this')})" 70 71 if isinstance(quantile, exp.Array): 72 func = self.func("quantiles", *quantile) 73 else: 74 func = self.func("quantile", quantile) 75 76 return func + args 77 78 79def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 80 if len(args) == 1: 81 return exp.CountIf(this=seq_get(args, 0)) 82 83 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 84 85 86def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 87 if len(args) == 3: 88 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 89 90 strtodate = exp.StrToDate.from_arg_list(args) 91 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 92 93 94def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 95 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 96 if not expression.unit: 97 return rename_func(name)(self, expression) 98 99 return self.func( 100 name, 101 unit_to_var(expression), 102 expression.expression, 103 expression.this, 104 ) 105 106 return _delta_sql 107 108 109def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 110 ts = expression.this 111 112 tz = expression.args.get("zone") 113 if tz and isinstance(ts, exp.Literal): 114 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 115 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 116 # 117 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 118 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 119 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 120 ts_string = ts.name.strip() 121 122 # separate [date and time] from [fractional seconds and UTC offset] 123 ts_parts = ts_string.split(".") 124 if len(ts_parts) == 2: 125 # separate fractional seconds and UTC offset 126 offset_sep = "+" if "+" in ts_parts[1] else "-" 127 ts_frac_parts = ts_parts[1].split(offset_sep) 128 num_frac_parts = len(ts_frac_parts) 129 130 # pad to 6 digits if fractional seconds present 131 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 132 ts_string = "".join( 133 [ 134 ts_parts[0], # date and time 135 ".", 136 ts_frac_parts[0], # fractional seconds 137 offset_sep if num_frac_parts > 1 else "", 138 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 139 ] 140 ) 141 142 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 143 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 144 # it's part of the timestamp string 145 ts_without_tz = ( 146 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 147 ) 148 ts = exp.Literal.string(ts_without_tz) 149 150 # Non-nullable DateTime64 with microsecond precision 151 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 152 datatype = exp.DataType.build( 153 exp.DataType.Type.DATETIME64, 154 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 155 nullable=False, 156 ) 157 158 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 159 160 161class ClickHouse(Dialect): 162 NORMALIZE_FUNCTIONS: bool | str = False 163 NULL_ORDERING = "nulls_are_last" 164 SUPPORTS_USER_DEFINED_TYPES = False 165 SAFE_DIVISION = True 166 LOG_BASE_FIRST: t.Optional[bool] = None 167 FORCE_EARLY_ALIAS_REF_EXPANSION = True 168 PRESERVE_ORIGINAL_NAMES = True 169 170 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 171 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 172 173 UNESCAPED_SEQUENCES = { 174 "\\0": "\0", 175 } 176 177 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 178 179 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 180 exp.Except: False, 181 exp.Intersect: False, 182 exp.Union: None, 183 } 184 185 class Tokenizer(tokens.Tokenizer): 186 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 187 IDENTIFIERS = ['"', "`"] 188 IDENTIFIER_ESCAPES = ["\\"] 189 STRING_ESCAPES = ["'", "\\"] 190 BIT_STRINGS = [("0b", "")] 191 HEX_STRINGS = [("0x", ""), ("0X", "")] 192 HEREDOC_STRINGS = ["$"] 193 194 KEYWORDS = { 195 **tokens.Tokenizer.KEYWORDS, 196 "ATTACH": TokenType.COMMAND, 197 "DATE32": TokenType.DATE32, 198 "DATETIME64": TokenType.DATETIME64, 199 "DICTIONARY": TokenType.DICTIONARY, 200 "ENUM8": TokenType.ENUM8, 201 "ENUM16": TokenType.ENUM16, 202 "FINAL": TokenType.FINAL, 203 "FIXEDSTRING": TokenType.FIXEDSTRING, 204 "FLOAT32": TokenType.FLOAT, 205 "FLOAT64": TokenType.DOUBLE, 206 "GLOBAL": TokenType.GLOBAL, 207 "INT256": TokenType.INT256, 208 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 209 "MAP": TokenType.MAP, 210 "NESTED": TokenType.NESTED, 211 "SAMPLE": TokenType.TABLE_SAMPLE, 212 "TUPLE": TokenType.STRUCT, 213 "UINT128": TokenType.UINT128, 214 "UINT16": TokenType.USMALLINT, 215 "UINT256": TokenType.UINT256, 216 "UINT32": TokenType.UINT, 217 "UINT64": TokenType.UBIGINT, 218 "UINT8": TokenType.UTINYINT, 219 "IPV4": TokenType.IPV4, 220 "IPV6": TokenType.IPV6, 221 "POINT": TokenType.POINT, 222 "RING": TokenType.RING, 223 "LINESTRING": TokenType.LINESTRING, 224 "MULTILINESTRING": TokenType.MULTILINESTRING, 225 "POLYGON": TokenType.POLYGON, 226 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 227 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 228 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 229 "SYSTEM": TokenType.COMMAND, 230 "PREWHERE": TokenType.PREWHERE, 231 } 232 KEYWORDS.pop("/*+") 233 234 SINGLE_TOKENS = { 235 **tokens.Tokenizer.SINGLE_TOKENS, 236 "$": TokenType.HEREDOC_STRING, 237 } 238 239 class Parser(parser.Parser): 240 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 241 # * select x from t1 union all select x from t2 limit 1; 242 # * select x from t1 union all (select x from t2 limit 1); 243 MODIFIERS_ATTACHED_TO_SET_OP = False 244 INTERVAL_SPANS = False 245 246 FUNCTIONS = { 247 **parser.Parser.FUNCTIONS, 248 "ANY": exp.AnyValue.from_arg_list, 249 "ARRAYSUM": exp.ArraySum.from_arg_list, 250 "COUNTIF": _build_count_if, 251 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATE_FORMAT": _build_date_format, 256 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 257 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 258 "FORMATDATETIME": _build_date_format, 259 "JSONEXTRACTSTRING": build_json_extract_path( 260 exp.JSONExtractScalar, zero_based_indexing=False 261 ), 262 "MAP": parser.build_var_map, 263 "MATCH": exp.RegexpLike.from_arg_list, 264 "RANDCANONICAL": exp.Rand.from_arg_list, 265 "STR_TO_DATE": _build_str_to_date, 266 "TUPLE": exp.Struct.from_arg_list, 267 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "UNIQ": exp.ApproxDistinct.from_arg_list, 272 "XOR": lambda args: exp.Xor(expressions=args), 273 "MD5": exp.MD5Digest.from_arg_list, 274 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 275 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 276 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 277 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 278 } 279 FUNCTIONS.pop("TRANSFORM") 280 281 AGG_FUNCTIONS = { 282 "count", 283 "min", 284 "max", 285 "sum", 286 "avg", 287 "any", 288 "stddevPop", 289 "stddevSamp", 290 "varPop", 291 "varSamp", 292 "corr", 293 "covarPop", 294 "covarSamp", 295 "entropy", 296 "exponentialMovingAverage", 297 "intervalLengthSum", 298 "kolmogorovSmirnovTest", 299 "mannWhitneyUTest", 300 "median", 301 "rankCorr", 302 "sumKahan", 303 "studentTTest", 304 "welchTTest", 305 "anyHeavy", 306 "anyLast", 307 "boundingRatio", 308 "first_value", 309 "last_value", 310 "argMin", 311 "argMax", 312 "avgWeighted", 313 "topK", 314 "topKWeighted", 315 "deltaSum", 316 "deltaSumTimestamp", 317 "groupArray", 318 "groupArrayLast", 319 "groupUniqArray", 320 "groupArrayInsertAt", 321 "groupArrayMovingAvg", 322 "groupArrayMovingSum", 323 "groupArraySample", 324 "groupBitAnd", 325 "groupBitOr", 326 "groupBitXor", 327 "groupBitmap", 328 "groupBitmapAnd", 329 "groupBitmapOr", 330 "groupBitmapXor", 331 "sumWithOverflow", 332 "sumMap", 333 "minMap", 334 "maxMap", 335 "skewSamp", 336 "skewPop", 337 "kurtSamp", 338 "kurtPop", 339 "uniq", 340 "uniqExact", 341 "uniqCombined", 342 "uniqCombined64", 343 "uniqHLL12", 344 "uniqTheta", 345 "quantile", 346 "quantiles", 347 "quantileExact", 348 "quantilesExact", 349 "quantileExactLow", 350 "quantilesExactLow", 351 "quantileExactHigh", 352 "quantilesExactHigh", 353 "quantileExactWeighted", 354 "quantilesExactWeighted", 355 "quantileTiming", 356 "quantilesTiming", 357 "quantileTimingWeighted", 358 "quantilesTimingWeighted", 359 "quantileDeterministic", 360 "quantilesDeterministic", 361 "quantileTDigest", 362 "quantilesTDigest", 363 "quantileTDigestWeighted", 364 "quantilesTDigestWeighted", 365 "quantileBFloat16", 366 "quantilesBFloat16", 367 "quantileBFloat16Weighted", 368 "quantilesBFloat16Weighted", 369 "simpleLinearRegression", 370 "stochasticLinearRegression", 371 "stochasticLogisticRegression", 372 "categoricalInformationValue", 373 "contingency", 374 "cramersV", 375 "cramersVBiasCorrected", 376 "theilsU", 377 "maxIntersections", 378 "maxIntersectionsPosition", 379 "meanZTest", 380 "quantileInterpolatedWeighted", 381 "quantilesInterpolatedWeighted", 382 "quantileGK", 383 "quantilesGK", 384 "sparkBar", 385 "sumCount", 386 "largestTriangleThreeBuckets", 387 "histogram", 388 "sequenceMatch", 389 "sequenceCount", 390 "windowFunnel", 391 "retention", 392 "uniqUpTo", 393 "sequenceNextNode", 394 "exponentialTimeDecayedAvg", 395 } 396 397 AGG_FUNCTIONS_SUFFIXES = [ 398 "If", 399 "Array", 400 "ArrayIf", 401 "Map", 402 "SimpleState", 403 "State", 404 "Merge", 405 "MergeState", 406 "ForEach", 407 "Distinct", 408 "OrDefault", 409 "OrNull", 410 "Resample", 411 "ArgMin", 412 "ArgMax", 413 ] 414 415 FUNC_TOKENS = { 416 *parser.Parser.FUNC_TOKENS, 417 TokenType.SET, 418 } 419 420 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 421 422 ID_VAR_TOKENS = { 423 *parser.Parser.ID_VAR_TOKENS, 424 TokenType.LIKE, 425 } 426 427 AGG_FUNC_MAPPING = ( 428 lambda functions, suffixes: { 429 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 430 } 431 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 432 433 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 434 435 FUNCTION_PARSERS = { 436 **parser.Parser.FUNCTION_PARSERS, 437 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 438 "QUANTILE": lambda self: self._parse_quantile(), 439 "MEDIAN": lambda self: self._parse_quantile(), 440 "COLUMNS": lambda self: self._parse_columns(), 441 } 442 443 FUNCTION_PARSERS.pop("MATCH") 444 445 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 446 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 447 448 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 449 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 450 451 RANGE_PARSERS = { 452 **parser.Parser.RANGE_PARSERS, 453 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 454 and self._parse_in(this, is_global=True), 455 } 456 457 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 458 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 459 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 460 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 461 462 JOIN_KINDS = { 463 *parser.Parser.JOIN_KINDS, 464 TokenType.ANY, 465 TokenType.ASOF, 466 TokenType.ARRAY, 467 } 468 469 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 470 TokenType.ANY, 471 TokenType.ARRAY, 472 TokenType.FINAL, 473 TokenType.FORMAT, 474 TokenType.SETTINGS, 475 } 476 477 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 478 TokenType.FORMAT, 479 } 480 481 LOG_DEFAULTS_TO_LN = True 482 483 QUERY_MODIFIER_PARSERS = { 484 **parser.Parser.QUERY_MODIFIER_PARSERS, 485 TokenType.SETTINGS: lambda self: ( 486 "settings", 487 self._advance() or self._parse_csv(self._parse_assignment), 488 ), 489 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 490 } 491 492 CONSTRAINT_PARSERS = { 493 **parser.Parser.CONSTRAINT_PARSERS, 494 "INDEX": lambda self: self._parse_index_constraint(), 495 "CODEC": lambda self: self._parse_compress(), 496 } 497 498 ALTER_PARSERS = { 499 **parser.Parser.ALTER_PARSERS, 500 "REPLACE": lambda self: self._parse_alter_table_replace(), 501 } 502 503 SCHEMA_UNNAMED_CONSTRAINTS = { 504 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 505 "INDEX", 506 } 507 508 PLACEHOLDER_PARSERS = { 509 **parser.Parser.PLACEHOLDER_PARSERS, 510 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 511 } 512 513 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 514 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 515 return self._parse_lambda() 516 517 def _parse_types( 518 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 519 ) -> t.Optional[exp.Expression]: 520 dtype = super()._parse_types( 521 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 522 ) 523 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 524 # Mark every type as non-nullable which is ClickHouse's default, unless it's 525 # already marked as nullable. This marker helps us transpile types from other 526 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 527 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 528 # fail in ClickHouse without the `Nullable` type constructor. 529 dtype.set("nullable", False) 530 531 return dtype 532 533 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 534 index = self._index 535 this = self._parse_bitwise() 536 if self._match(TokenType.FROM): 537 self._retreat(index) 538 return super()._parse_extract() 539 540 # We return Anonymous here because extract and regexpExtract have different semantics, 541 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 542 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 543 # 544 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 545 self._match(TokenType.COMMA) 546 return self.expression( 547 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 548 ) 549 550 def _parse_assignment(self) -> t.Optional[exp.Expression]: 551 this = super()._parse_assignment() 552 553 if self._match(TokenType.PLACEHOLDER): 554 return self.expression( 555 exp.If, 556 this=this, 557 true=self._parse_assignment(), 558 false=self._match(TokenType.COLON) and self._parse_assignment(), 559 ) 560 561 return this 562 563 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 564 """ 565 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 566 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 567 """ 568 this = self._parse_id_var() 569 self._match(TokenType.COLON) 570 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 571 self._match_text_seq("IDENTIFIER") and "Identifier" 572 ) 573 574 if not kind: 575 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 576 elif not self._match(TokenType.R_BRACE): 577 self.raise_error("Expecting }") 578 579 return self.expression(exp.Placeholder, this=this, kind=kind) 580 581 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 582 this = super()._parse_in(this) 583 this.set("is_global", is_global) 584 return this 585 586 def _parse_table( 587 self, 588 schema: bool = False, 589 joins: bool = False, 590 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 591 parse_bracket: bool = False, 592 is_db_reference: bool = False, 593 parse_partition: bool = False, 594 ) -> t.Optional[exp.Expression]: 595 this = super()._parse_table( 596 schema=schema, 597 joins=joins, 598 alias_tokens=alias_tokens, 599 parse_bracket=parse_bracket, 600 is_db_reference=is_db_reference, 601 ) 602 603 if self._match(TokenType.FINAL): 604 this = self.expression(exp.Final, this=this) 605 606 return this 607 608 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 609 return super()._parse_position(haystack_first=True) 610 611 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 612 def _parse_cte(self) -> exp.CTE: 613 # WITH <identifier> AS <subquery expression> 614 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 615 616 if not cte: 617 # WITH <expression> AS <identifier> 618 cte = self.expression( 619 exp.CTE, 620 this=self._parse_assignment(), 621 alias=self._parse_table_alias(), 622 scalar=True, 623 ) 624 625 return cte 626 627 def _parse_join_parts( 628 self, 629 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 630 is_global = self._match(TokenType.GLOBAL) and self._prev 631 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 632 633 if kind_pre: 634 kind = self._match_set(self.JOIN_KINDS) and self._prev 635 side = self._match_set(self.JOIN_SIDES) and self._prev 636 return is_global, side, kind 637 638 return ( 639 is_global, 640 self._match_set(self.JOIN_SIDES) and self._prev, 641 self._match_set(self.JOIN_KINDS) and self._prev, 642 ) 643 644 def _parse_join( 645 self, skip_join_token: bool = False, parse_bracket: bool = False 646 ) -> t.Optional[exp.Join]: 647 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 648 if join: 649 join.set("global", join.args.pop("method", None)) 650 651 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 652 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 653 if join.kind == "ARRAY": 654 for table in join.find_all(exp.Table): 655 table.replace(table.to_column()) 656 657 return join 658 659 def _parse_function( 660 self, 661 functions: t.Optional[t.Dict[str, t.Callable]] = None, 662 anonymous: bool = False, 663 optional_parens: bool = True, 664 any_token: bool = False, 665 ) -> t.Optional[exp.Expression]: 666 expr = super()._parse_function( 667 functions=functions, 668 anonymous=anonymous, 669 optional_parens=optional_parens, 670 any_token=any_token, 671 ) 672 673 func = expr.this if isinstance(expr, exp.Window) else expr 674 675 # Aggregate functions can be split in 2 parts: <func_name><suffix> 676 parts = ( 677 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 678 ) 679 680 if parts: 681 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 682 params = self._parse_func_params(anon_func) 683 684 kwargs = { 685 "this": anon_func.this, 686 "expressions": anon_func.expressions, 687 } 688 if parts[1]: 689 kwargs["parts"] = parts 690 exp_class: t.Type[exp.Expression] = ( 691 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 692 ) 693 else: 694 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 695 696 kwargs["exp_class"] = exp_class 697 if params: 698 kwargs["params"] = params 699 700 func = self.expression(**kwargs) 701 702 if isinstance(expr, exp.Window): 703 # The window's func was parsed as Anonymous in base parser, fix its 704 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 705 expr.set("this", func) 706 elif params: 707 # Params have blocked super()._parse_function() from parsing the following window 708 # (if that exists) as they're standing between the function call and the window spec 709 expr = self._parse_window(func) 710 else: 711 expr = func 712 713 return expr 714 715 def _parse_func_params( 716 self, this: t.Optional[exp.Func] = None 717 ) -> t.Optional[t.List[exp.Expression]]: 718 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 719 return self._parse_csv(self._parse_lambda) 720 721 if self._match(TokenType.L_PAREN): 722 params = self._parse_csv(self._parse_lambda) 723 self._match_r_paren(this) 724 return params 725 726 return None 727 728 def _parse_quantile(self) -> exp.Quantile: 729 this = self._parse_lambda() 730 params = self._parse_func_params() 731 if params: 732 return self.expression(exp.Quantile, this=params[0], quantile=this) 733 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 734 735 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 736 return super()._parse_wrapped_id_vars(optional=True) 737 738 def _parse_primary_key( 739 self, wrapped_optional: bool = False, in_props: bool = False 740 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 741 return super()._parse_primary_key( 742 wrapped_optional=wrapped_optional or in_props, in_props=in_props 743 ) 744 745 def _parse_on_property(self) -> t.Optional[exp.Expression]: 746 index = self._index 747 if self._match_text_seq("CLUSTER"): 748 this = self._parse_id_var() 749 if this: 750 return self.expression(exp.OnCluster, this=this) 751 else: 752 self._retreat(index) 753 return None 754 755 def _parse_index_constraint( 756 self, kind: t.Optional[str] = None 757 ) -> exp.IndexColumnConstraint: 758 # INDEX name1 expr TYPE type1(args) GRANULARITY value 759 this = self._parse_id_var() 760 expression = self._parse_assignment() 761 762 index_type = self._match_text_seq("TYPE") and ( 763 self._parse_function() or self._parse_var() 764 ) 765 766 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 767 768 return self.expression( 769 exp.IndexColumnConstraint, 770 this=this, 771 expression=expression, 772 index_type=index_type, 773 granularity=granularity, 774 ) 775 776 def _parse_partition(self) -> t.Optional[exp.Partition]: 777 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 778 if not self._match(TokenType.PARTITION): 779 return None 780 781 if self._match_text_seq("ID"): 782 # Corresponds to the PARTITION ID <string_value> syntax 783 expressions: t.List[exp.Expression] = [ 784 self.expression(exp.PartitionId, this=self._parse_string()) 785 ] 786 else: 787 expressions = self._parse_expressions() 788 789 return self.expression(exp.Partition, expressions=expressions) 790 791 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 792 partition = self._parse_partition() 793 794 if not partition or not self._match(TokenType.FROM): 795 return None 796 797 return self.expression( 798 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 799 ) 800 801 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 802 if not self._match_text_seq("PROJECTION"): 803 return None 804 805 return self.expression( 806 exp.ProjectionDef, 807 this=self._parse_id_var(), 808 expression=self._parse_wrapped(self._parse_statement), 809 ) 810 811 def _parse_constraint(self) -> t.Optional[exp.Expression]: 812 return super()._parse_constraint() or self._parse_projection_def() 813 814 def _parse_alias( 815 self, this: t.Optional[exp.Expression], explicit: bool = False 816 ) -> t.Optional[exp.Expression]: 817 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 818 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 819 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 820 return this 821 822 return super()._parse_alias(this=this, explicit=explicit) 823 824 def _parse_expression(self) -> t.Optional[exp.Expression]: 825 this = super()._parse_expression() 826 827 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 828 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 829 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 830 self._match(TokenType.R_PAREN) 831 832 return this 833 834 def _parse_columns(self) -> exp.Expression: 835 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 836 837 while self._next and self._match_text_seq(")", "APPLY", "("): 838 self._match(TokenType.R_PAREN) 839 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 840 return this 841 842 class Generator(generator.Generator): 843 QUERY_HINTS = False 844 STRUCT_DELIMITER = ("(", ")") 845 NVL2_SUPPORTED = False 846 TABLESAMPLE_REQUIRES_PARENS = False 847 TABLESAMPLE_SIZE_IS_ROWS = False 848 TABLESAMPLE_KEYWORDS = "SAMPLE" 849 LAST_DAY_SUPPORTS_DATE_PART = False 850 CAN_IMPLEMENT_ARRAY_ANY = True 851 SUPPORTS_TO_NUMBER = False 852 JOIN_HINTS = False 853 TABLE_HINTS = False 854 GROUPINGS_SEP = "" 855 SET_OP_MODIFIERS = False 856 SUPPORTS_TABLE_ALIAS_COLUMNS = False 857 VALUES_AS_TABLE = False 858 ARRAY_SIZE_NAME = "LENGTH" 859 860 STRING_TYPE_MAPPING = { 861 exp.DataType.Type.CHAR: "String", 862 exp.DataType.Type.LONGBLOB: "String", 863 exp.DataType.Type.LONGTEXT: "String", 864 exp.DataType.Type.MEDIUMBLOB: "String", 865 exp.DataType.Type.MEDIUMTEXT: "String", 866 exp.DataType.Type.TINYBLOB: "String", 867 exp.DataType.Type.TINYTEXT: "String", 868 exp.DataType.Type.TEXT: "String", 869 exp.DataType.Type.VARBINARY: "String", 870 exp.DataType.Type.VARCHAR: "String", 871 } 872 873 SUPPORTED_JSON_PATH_PARTS = { 874 exp.JSONPathKey, 875 exp.JSONPathRoot, 876 exp.JSONPathSubscript, 877 } 878 879 TYPE_MAPPING = { 880 **generator.Generator.TYPE_MAPPING, 881 **STRING_TYPE_MAPPING, 882 exp.DataType.Type.ARRAY: "Array", 883 exp.DataType.Type.BOOLEAN: "Bool", 884 exp.DataType.Type.BIGINT: "Int64", 885 exp.DataType.Type.DATE32: "Date32", 886 exp.DataType.Type.DATETIME: "DateTime", 887 exp.DataType.Type.DATETIME2: "DateTime", 888 exp.DataType.Type.SMALLDATETIME: "DateTime", 889 exp.DataType.Type.DATETIME64: "DateTime64", 890 exp.DataType.Type.DECIMAL: "Decimal", 891 exp.DataType.Type.DECIMAL32: "Decimal32", 892 exp.DataType.Type.DECIMAL64: "Decimal64", 893 exp.DataType.Type.DECIMAL128: "Decimal128", 894 exp.DataType.Type.DECIMAL256: "Decimal256", 895 exp.DataType.Type.TIMESTAMP: "DateTime", 896 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 897 exp.DataType.Type.DOUBLE: "Float64", 898 exp.DataType.Type.ENUM: "Enum", 899 exp.DataType.Type.ENUM8: "Enum8", 900 exp.DataType.Type.ENUM16: "Enum16", 901 exp.DataType.Type.FIXEDSTRING: "FixedString", 902 exp.DataType.Type.FLOAT: "Float32", 903 exp.DataType.Type.INT: "Int32", 904 exp.DataType.Type.MEDIUMINT: "Int32", 905 exp.DataType.Type.INT128: "Int128", 906 exp.DataType.Type.INT256: "Int256", 907 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 908 exp.DataType.Type.MAP: "Map", 909 exp.DataType.Type.NESTED: "Nested", 910 exp.DataType.Type.SMALLINT: "Int16", 911 exp.DataType.Type.STRUCT: "Tuple", 912 exp.DataType.Type.TINYINT: "Int8", 913 exp.DataType.Type.UBIGINT: "UInt64", 914 exp.DataType.Type.UINT: "UInt32", 915 exp.DataType.Type.UINT128: "UInt128", 916 exp.DataType.Type.UINT256: "UInt256", 917 exp.DataType.Type.USMALLINT: "UInt16", 918 exp.DataType.Type.UTINYINT: "UInt8", 919 exp.DataType.Type.IPV4: "IPv4", 920 exp.DataType.Type.IPV6: "IPv6", 921 exp.DataType.Type.POINT: "Point", 922 exp.DataType.Type.RING: "Ring", 923 exp.DataType.Type.LINESTRING: "LineString", 924 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 925 exp.DataType.Type.POLYGON: "Polygon", 926 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 927 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 928 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 929 } 930 931 TRANSFORMS = { 932 **generator.Generator.TRANSFORMS, 933 exp.AnyValue: rename_func("any"), 934 exp.ApproxDistinct: rename_func("uniq"), 935 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 936 exp.ArraySum: rename_func("arraySum"), 937 exp.ArgMax: arg_max_or_min_no_count("argMax"), 938 exp.ArgMin: arg_max_or_min_no_count("argMin"), 939 exp.Array: inline_array_sql, 940 exp.CastToStrType: rename_func("CAST"), 941 exp.CountIf: rename_func("countIf"), 942 exp.CompressColumnConstraint: lambda self, 943 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 944 exp.ComputedColumnConstraint: lambda self, 945 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 946 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 947 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 948 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 949 exp.DateStrToDate: rename_func("toDate"), 950 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 951 exp.Explode: rename_func("arrayJoin"), 952 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 953 exp.IsNan: rename_func("isNaN"), 954 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 955 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 956 exp.JSONPathKey: json_path_key_only_name, 957 exp.JSONPathRoot: lambda *_: "", 958 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 959 exp.Median: rename_func("median"), 960 exp.Nullif: rename_func("nullIf"), 961 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 962 exp.Pivot: no_pivot_sql, 963 exp.Quantile: _quantile_sql, 964 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 965 exp.Rand: rename_func("randCanonical"), 966 exp.StartsWith: rename_func("startsWith"), 967 exp.StrPosition: lambda self, e: self.func( 968 "position", e.this, e.args.get("substr"), e.args.get("position") 969 ), 970 exp.TimeToStr: lambda self, e: self.func( 971 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 972 ), 973 exp.TimeStrToTime: _timestrtotime_sql, 974 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 975 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 976 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 977 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 978 exp.MD5Digest: rename_func("MD5"), 979 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 980 exp.SHA: rename_func("SHA1"), 981 exp.SHA2: sha256_sql, 982 exp.UnixToTime: _unix_to_time_sql, 983 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 984 exp.Trim: trim_sql, 985 exp.Variance: rename_func("varSamp"), 986 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 987 exp.Stddev: rename_func("stddevSamp"), 988 exp.Chr: rename_func("CHAR"), 989 exp.Lag: lambda self, e: self.func( 990 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 991 ), 992 exp.Lead: lambda self, e: self.func( 993 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 994 ), 995 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 996 rename_func("editDistance") 997 ), 998 } 999 1000 PROPERTIES_LOCATION = { 1001 **generator.Generator.PROPERTIES_LOCATION, 1002 exp.OnCluster: exp.Properties.Location.POST_NAME, 1003 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1004 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1005 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1006 } 1007 1008 # There's no list in docs, but it can be found in Clickhouse code 1009 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1010 ON_CLUSTER_TARGETS = { 1011 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1012 "DATABASE", 1013 "TABLE", 1014 "VIEW", 1015 "DICTIONARY", 1016 "INDEX", 1017 "FUNCTION", 1018 "NAMED COLLECTION", 1019 } 1020 1021 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1022 NON_NULLABLE_TYPES = { 1023 exp.DataType.Type.ARRAY, 1024 exp.DataType.Type.MAP, 1025 exp.DataType.Type.STRUCT, 1026 exp.DataType.Type.POINT, 1027 exp.DataType.Type.RING, 1028 exp.DataType.Type.LINESTRING, 1029 exp.DataType.Type.MULTILINESTRING, 1030 exp.DataType.Type.POLYGON, 1031 exp.DataType.Type.MULTIPOLYGON, 1032 } 1033 1034 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1035 strtodate_sql = self.function_fallback_sql(expression) 1036 1037 if not isinstance(expression.parent, exp.Cast): 1038 # StrToDate returns DATEs in other dialects (eg. postgres), so 1039 # this branch aims to improve the transpilation to clickhouse 1040 return f"CAST({strtodate_sql} AS DATE)" 1041 1042 return strtodate_sql 1043 1044 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1045 this = expression.this 1046 1047 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1048 return self.sql(this) 1049 1050 return super().cast_sql(expression, safe_prefix=safe_prefix) 1051 1052 def trycast_sql(self, expression: exp.TryCast) -> str: 1053 dtype = expression.to 1054 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1055 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1056 dtype.set("nullable", True) 1057 1058 return super().cast_sql(expression) 1059 1060 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1061 this = self.json_path_part(expression.this) 1062 return str(int(this) + 1) if is_int(this) else this 1063 1064 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1065 return f"AS {self.sql(expression, 'this')}" 1066 1067 def _any_to_has( 1068 self, 1069 expression: exp.EQ | exp.NEQ, 1070 default: t.Callable[[t.Any], str], 1071 prefix: str = "", 1072 ) -> str: 1073 if isinstance(expression.left, exp.Any): 1074 arr = expression.left 1075 this = expression.right 1076 elif isinstance(expression.right, exp.Any): 1077 arr = expression.right 1078 this = expression.left 1079 else: 1080 return default(expression) 1081 1082 return prefix + self.func("has", arr.this.unnest(), this) 1083 1084 def eq_sql(self, expression: exp.EQ) -> str: 1085 return self._any_to_has(expression, super().eq_sql) 1086 1087 def neq_sql(self, expression: exp.NEQ) -> str: 1088 return self._any_to_has(expression, super().neq_sql, "NOT ") 1089 1090 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1091 # Manually add a flag to make the search case-insensitive 1092 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1093 return self.func("match", expression.this, regex) 1094 1095 def datatype_sql(self, expression: exp.DataType) -> str: 1096 # String is the standard ClickHouse type, every other variant is just an alias. 1097 # Additionally, any supplied length parameter will be ignored. 1098 # 1099 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1100 if expression.this in self.STRING_TYPE_MAPPING: 1101 dtype = "String" 1102 else: 1103 dtype = super().datatype_sql(expression) 1104 1105 # This section changes the type to `Nullable(...)` if the following conditions hold: 1106 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1107 # and change their semantics 1108 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1109 # constraint: "Type of Map key must be a type, that can be represented by integer or 1110 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1111 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1112 parent = expression.parent 1113 nullable = expression.args.get("nullable") 1114 if nullable is True or ( 1115 nullable is None 1116 and not ( 1117 isinstance(parent, exp.DataType) 1118 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1119 and expression.index in (None, 0) 1120 ) 1121 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1122 ): 1123 dtype = f"Nullable({dtype})" 1124 1125 return dtype 1126 1127 def cte_sql(self, expression: exp.CTE) -> str: 1128 if expression.args.get("scalar"): 1129 this = self.sql(expression, "this") 1130 alias = self.sql(expression, "alias") 1131 return f"{this} AS {alias}" 1132 1133 return super().cte_sql(expression) 1134 1135 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1136 return super().after_limit_modifiers(expression) + [ 1137 ( 1138 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1139 if expression.args.get("settings") 1140 else "" 1141 ), 1142 ( 1143 self.seg("FORMAT ") + self.sql(expression, "format") 1144 if expression.args.get("format") 1145 else "" 1146 ), 1147 ] 1148 1149 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1150 params = self.expressions(expression, key="params", flat=True) 1151 return self.func(expression.name, *expression.expressions) + f"({params})" 1152 1153 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1154 return self.func(expression.name, *expression.expressions) 1155 1156 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1157 return self.anonymousaggfunc_sql(expression) 1158 1159 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1160 return self.parameterizedagg_sql(expression) 1161 1162 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1163 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1164 1165 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1166 return f"ON CLUSTER {self.sql(expression, 'this')}" 1167 1168 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1169 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1170 exp.Properties.Location.POST_NAME 1171 ): 1172 this_name = self.sql( 1173 expression.this if isinstance(expression.this, exp.Schema) else expression, 1174 "this", 1175 ) 1176 this_properties = " ".join( 1177 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1178 ) 1179 this_schema = self.schema_columns_sql(expression.this) 1180 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1181 1182 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1183 1184 return super().createable_sql(expression, locations) 1185 1186 def create_sql(self, expression: exp.Create) -> str: 1187 # The comment property comes last in CTAS statements, i.e. after the query 1188 query = expression.expression 1189 if isinstance(query, exp.Query): 1190 comment_prop = expression.find(exp.SchemaCommentProperty) 1191 if comment_prop: 1192 comment_prop.pop() 1193 query.replace(exp.paren(query)) 1194 else: 1195 comment_prop = None 1196 1197 create_sql = super().create_sql(expression) 1198 1199 comment_sql = self.sql(comment_prop) 1200 comment_sql = f" {comment_sql}" if comment_sql else "" 1201 1202 return f"{create_sql}{comment_sql}" 1203 1204 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1205 this = self.indent(self.sql(expression, "this")) 1206 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1207 1208 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1209 this = self.sql(expression, "this") 1210 this = f" {this}" if this else "" 1211 expr = self.sql(expression, "expression") 1212 expr = f" {expr}" if expr else "" 1213 index_type = self.sql(expression, "index_type") 1214 index_type = f" TYPE {index_type}" if index_type else "" 1215 granularity = self.sql(expression, "granularity") 1216 granularity = f" GRANULARITY {granularity}" if granularity else "" 1217 1218 return f"INDEX{this}{expr}{index_type}{granularity}" 1219 1220 def partition_sql(self, expression: exp.Partition) -> str: 1221 return f"PARTITION {self.expressions(expression, flat=True)}" 1222 1223 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1224 return f"ID {self.sql(expression.this)}" 1225 1226 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1227 return ( 1228 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1229 ) 1230 1231 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1232 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1233 1234 def is_sql(self, expression: exp.Is) -> str: 1235 is_sql = super().is_sql(expression) 1236 1237 if isinstance(expression.parent, exp.Not): 1238 # value IS NOT NULL -> NOT (value IS NULL) 1239 is_sql = self.wrap(is_sql) 1240 1241 return is_sql
162class ClickHouse(Dialect): 163 NORMALIZE_FUNCTIONS: bool | str = False 164 NULL_ORDERING = "nulls_are_last" 165 SUPPORTS_USER_DEFINED_TYPES = False 166 SAFE_DIVISION = True 167 LOG_BASE_FIRST: t.Optional[bool] = None 168 FORCE_EARLY_ALIAS_REF_EXPANSION = True 169 PRESERVE_ORIGINAL_NAMES = True 170 171 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 172 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 173 174 UNESCAPED_SEQUENCES = { 175 "\\0": "\0", 176 } 177 178 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 179 180 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 181 exp.Except: False, 182 exp.Intersect: False, 183 exp.Union: None, 184 } 185 186 class Tokenizer(tokens.Tokenizer): 187 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 188 IDENTIFIERS = ['"', "`"] 189 IDENTIFIER_ESCAPES = ["\\"] 190 STRING_ESCAPES = ["'", "\\"] 191 BIT_STRINGS = [("0b", "")] 192 HEX_STRINGS = [("0x", ""), ("0X", "")] 193 HEREDOC_STRINGS = ["$"] 194 195 KEYWORDS = { 196 **tokens.Tokenizer.KEYWORDS, 197 "ATTACH": TokenType.COMMAND, 198 "DATE32": TokenType.DATE32, 199 "DATETIME64": TokenType.DATETIME64, 200 "DICTIONARY": TokenType.DICTIONARY, 201 "ENUM8": TokenType.ENUM8, 202 "ENUM16": TokenType.ENUM16, 203 "FINAL": TokenType.FINAL, 204 "FIXEDSTRING": TokenType.FIXEDSTRING, 205 "FLOAT32": TokenType.FLOAT, 206 "FLOAT64": TokenType.DOUBLE, 207 "GLOBAL": TokenType.GLOBAL, 208 "INT256": TokenType.INT256, 209 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 210 "MAP": TokenType.MAP, 211 "NESTED": TokenType.NESTED, 212 "SAMPLE": TokenType.TABLE_SAMPLE, 213 "TUPLE": TokenType.STRUCT, 214 "UINT128": TokenType.UINT128, 215 "UINT16": TokenType.USMALLINT, 216 "UINT256": TokenType.UINT256, 217 "UINT32": TokenType.UINT, 218 "UINT64": TokenType.UBIGINT, 219 "UINT8": TokenType.UTINYINT, 220 "IPV4": TokenType.IPV4, 221 "IPV6": TokenType.IPV6, 222 "POINT": TokenType.POINT, 223 "RING": TokenType.RING, 224 "LINESTRING": TokenType.LINESTRING, 225 "MULTILINESTRING": TokenType.MULTILINESTRING, 226 "POLYGON": TokenType.POLYGON, 227 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 228 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 229 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 230 "SYSTEM": TokenType.COMMAND, 231 "PREWHERE": TokenType.PREWHERE, 232 } 233 KEYWORDS.pop("/*+") 234 235 SINGLE_TOKENS = { 236 **tokens.Tokenizer.SINGLE_TOKENS, 237 "$": TokenType.HEREDOC_STRING, 238 } 239 240 class Parser(parser.Parser): 241 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 242 # * select x from t1 union all select x from t2 limit 1; 243 # * select x from t1 union all (select x from t2 limit 1); 244 MODIFIERS_ATTACHED_TO_SET_OP = False 245 INTERVAL_SPANS = False 246 247 FUNCTIONS = { 248 **parser.Parser.FUNCTIONS, 249 "ANY": exp.AnyValue.from_arg_list, 250 "ARRAYSUM": exp.ArraySum.from_arg_list, 251 "COUNTIF": _build_count_if, 252 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 254 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 256 "DATE_FORMAT": _build_date_format, 257 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 258 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 259 "FORMATDATETIME": _build_date_format, 260 "JSONEXTRACTSTRING": build_json_extract_path( 261 exp.JSONExtractScalar, zero_based_indexing=False 262 ), 263 "MAP": parser.build_var_map, 264 "MATCH": exp.RegexpLike.from_arg_list, 265 "RANDCANONICAL": exp.Rand.from_arg_list, 266 "STR_TO_DATE": _build_str_to_date, 267 "TUPLE": exp.Struct.from_arg_list, 268 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 270 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 272 "UNIQ": exp.ApproxDistinct.from_arg_list, 273 "XOR": lambda args: exp.Xor(expressions=args), 274 "MD5": exp.MD5Digest.from_arg_list, 275 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 276 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 277 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 278 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 279 } 280 FUNCTIONS.pop("TRANSFORM") 281 282 AGG_FUNCTIONS = { 283 "count", 284 "min", 285 "max", 286 "sum", 287 "avg", 288 "any", 289 "stddevPop", 290 "stddevSamp", 291 "varPop", 292 "varSamp", 293 "corr", 294 "covarPop", 295 "covarSamp", 296 "entropy", 297 "exponentialMovingAverage", 298 "intervalLengthSum", 299 "kolmogorovSmirnovTest", 300 "mannWhitneyUTest", 301 "median", 302 "rankCorr", 303 "sumKahan", 304 "studentTTest", 305 "welchTTest", 306 "anyHeavy", 307 "anyLast", 308 "boundingRatio", 309 "first_value", 310 "last_value", 311 "argMin", 312 "argMax", 313 "avgWeighted", 314 "topK", 315 "topKWeighted", 316 "deltaSum", 317 "deltaSumTimestamp", 318 "groupArray", 319 "groupArrayLast", 320 "groupUniqArray", 321 "groupArrayInsertAt", 322 "groupArrayMovingAvg", 323 "groupArrayMovingSum", 324 "groupArraySample", 325 "groupBitAnd", 326 "groupBitOr", 327 "groupBitXor", 328 "groupBitmap", 329 "groupBitmapAnd", 330 "groupBitmapOr", 331 "groupBitmapXor", 332 "sumWithOverflow", 333 "sumMap", 334 "minMap", 335 "maxMap", 336 "skewSamp", 337 "skewPop", 338 "kurtSamp", 339 "kurtPop", 340 "uniq", 341 "uniqExact", 342 "uniqCombined", 343 "uniqCombined64", 344 "uniqHLL12", 345 "uniqTheta", 346 "quantile", 347 "quantiles", 348 "quantileExact", 349 "quantilesExact", 350 "quantileExactLow", 351 "quantilesExactLow", 352 "quantileExactHigh", 353 "quantilesExactHigh", 354 "quantileExactWeighted", 355 "quantilesExactWeighted", 356 "quantileTiming", 357 "quantilesTiming", 358 "quantileTimingWeighted", 359 "quantilesTimingWeighted", 360 "quantileDeterministic", 361 "quantilesDeterministic", 362 "quantileTDigest", 363 "quantilesTDigest", 364 "quantileTDigestWeighted", 365 "quantilesTDigestWeighted", 366 "quantileBFloat16", 367 "quantilesBFloat16", 368 "quantileBFloat16Weighted", 369 "quantilesBFloat16Weighted", 370 "simpleLinearRegression", 371 "stochasticLinearRegression", 372 "stochasticLogisticRegression", 373 "categoricalInformationValue", 374 "contingency", 375 "cramersV", 376 "cramersVBiasCorrected", 377 "theilsU", 378 "maxIntersections", 379 "maxIntersectionsPosition", 380 "meanZTest", 381 "quantileInterpolatedWeighted", 382 "quantilesInterpolatedWeighted", 383 "quantileGK", 384 "quantilesGK", 385 "sparkBar", 386 "sumCount", 387 "largestTriangleThreeBuckets", 388 "histogram", 389 "sequenceMatch", 390 "sequenceCount", 391 "windowFunnel", 392 "retention", 393 "uniqUpTo", 394 "sequenceNextNode", 395 "exponentialTimeDecayedAvg", 396 } 397 398 AGG_FUNCTIONS_SUFFIXES = [ 399 "If", 400 "Array", 401 "ArrayIf", 402 "Map", 403 "SimpleState", 404 "State", 405 "Merge", 406 "MergeState", 407 "ForEach", 408 "Distinct", 409 "OrDefault", 410 "OrNull", 411 "Resample", 412 "ArgMin", 413 "ArgMax", 414 ] 415 416 FUNC_TOKENS = { 417 *parser.Parser.FUNC_TOKENS, 418 TokenType.SET, 419 } 420 421 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 422 423 ID_VAR_TOKENS = { 424 *parser.Parser.ID_VAR_TOKENS, 425 TokenType.LIKE, 426 } 427 428 AGG_FUNC_MAPPING = ( 429 lambda functions, suffixes: { 430 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 431 } 432 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 433 434 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 435 436 FUNCTION_PARSERS = { 437 **parser.Parser.FUNCTION_PARSERS, 438 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 439 "QUANTILE": lambda self: self._parse_quantile(), 440 "MEDIAN": lambda self: self._parse_quantile(), 441 "COLUMNS": lambda self: self._parse_columns(), 442 } 443 444 FUNCTION_PARSERS.pop("MATCH") 445 446 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 447 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 448 449 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 450 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 451 452 RANGE_PARSERS = { 453 **parser.Parser.RANGE_PARSERS, 454 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 455 and self._parse_in(this, is_global=True), 456 } 457 458 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 459 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 460 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 461 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 462 463 JOIN_KINDS = { 464 *parser.Parser.JOIN_KINDS, 465 TokenType.ANY, 466 TokenType.ASOF, 467 TokenType.ARRAY, 468 } 469 470 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 471 TokenType.ANY, 472 TokenType.ARRAY, 473 TokenType.FINAL, 474 TokenType.FORMAT, 475 TokenType.SETTINGS, 476 } 477 478 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 479 TokenType.FORMAT, 480 } 481 482 LOG_DEFAULTS_TO_LN = True 483 484 QUERY_MODIFIER_PARSERS = { 485 **parser.Parser.QUERY_MODIFIER_PARSERS, 486 TokenType.SETTINGS: lambda self: ( 487 "settings", 488 self._advance() or self._parse_csv(self._parse_assignment), 489 ), 490 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 491 } 492 493 CONSTRAINT_PARSERS = { 494 **parser.Parser.CONSTRAINT_PARSERS, 495 "INDEX": lambda self: self._parse_index_constraint(), 496 "CODEC": lambda self: self._parse_compress(), 497 } 498 499 ALTER_PARSERS = { 500 **parser.Parser.ALTER_PARSERS, 501 "REPLACE": lambda self: self._parse_alter_table_replace(), 502 } 503 504 SCHEMA_UNNAMED_CONSTRAINTS = { 505 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 506 "INDEX", 507 } 508 509 PLACEHOLDER_PARSERS = { 510 **parser.Parser.PLACEHOLDER_PARSERS, 511 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 512 } 513 514 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 515 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 516 return self._parse_lambda() 517 518 def _parse_types( 519 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 520 ) -> t.Optional[exp.Expression]: 521 dtype = super()._parse_types( 522 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 523 ) 524 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 525 # Mark every type as non-nullable which is ClickHouse's default, unless it's 526 # already marked as nullable. This marker helps us transpile types from other 527 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 528 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 529 # fail in ClickHouse without the `Nullable` type constructor. 530 dtype.set("nullable", False) 531 532 return dtype 533 534 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 535 index = self._index 536 this = self._parse_bitwise() 537 if self._match(TokenType.FROM): 538 self._retreat(index) 539 return super()._parse_extract() 540 541 # We return Anonymous here because extract and regexpExtract have different semantics, 542 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 543 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 544 # 545 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 546 self._match(TokenType.COMMA) 547 return self.expression( 548 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 549 ) 550 551 def _parse_assignment(self) -> t.Optional[exp.Expression]: 552 this = super()._parse_assignment() 553 554 if self._match(TokenType.PLACEHOLDER): 555 return self.expression( 556 exp.If, 557 this=this, 558 true=self._parse_assignment(), 559 false=self._match(TokenType.COLON) and self._parse_assignment(), 560 ) 561 562 return this 563 564 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 565 """ 566 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 567 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 568 """ 569 this = self._parse_id_var() 570 self._match(TokenType.COLON) 571 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 572 self._match_text_seq("IDENTIFIER") and "Identifier" 573 ) 574 575 if not kind: 576 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 577 elif not self._match(TokenType.R_BRACE): 578 self.raise_error("Expecting }") 579 580 return self.expression(exp.Placeholder, this=this, kind=kind) 581 582 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 583 this = super()._parse_in(this) 584 this.set("is_global", is_global) 585 return this 586 587 def _parse_table( 588 self, 589 schema: bool = False, 590 joins: bool = False, 591 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 592 parse_bracket: bool = False, 593 is_db_reference: bool = False, 594 parse_partition: bool = False, 595 ) -> t.Optional[exp.Expression]: 596 this = super()._parse_table( 597 schema=schema, 598 joins=joins, 599 alias_tokens=alias_tokens, 600 parse_bracket=parse_bracket, 601 is_db_reference=is_db_reference, 602 ) 603 604 if self._match(TokenType.FINAL): 605 this = self.expression(exp.Final, this=this) 606 607 return this 608 609 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 610 return super()._parse_position(haystack_first=True) 611 612 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 613 def _parse_cte(self) -> exp.CTE: 614 # WITH <identifier> AS <subquery expression> 615 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 616 617 if not cte: 618 # WITH <expression> AS <identifier> 619 cte = self.expression( 620 exp.CTE, 621 this=self._parse_assignment(), 622 alias=self._parse_table_alias(), 623 scalar=True, 624 ) 625 626 return cte 627 628 def _parse_join_parts( 629 self, 630 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 631 is_global = self._match(TokenType.GLOBAL) and self._prev 632 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 633 634 if kind_pre: 635 kind = self._match_set(self.JOIN_KINDS) and self._prev 636 side = self._match_set(self.JOIN_SIDES) and self._prev 637 return is_global, side, kind 638 639 return ( 640 is_global, 641 self._match_set(self.JOIN_SIDES) and self._prev, 642 self._match_set(self.JOIN_KINDS) and self._prev, 643 ) 644 645 def _parse_join( 646 self, skip_join_token: bool = False, parse_bracket: bool = False 647 ) -> t.Optional[exp.Join]: 648 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 649 if join: 650 join.set("global", join.args.pop("method", None)) 651 652 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 653 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 654 if join.kind == "ARRAY": 655 for table in join.find_all(exp.Table): 656 table.replace(table.to_column()) 657 658 return join 659 660 def _parse_function( 661 self, 662 functions: t.Optional[t.Dict[str, t.Callable]] = None, 663 anonymous: bool = False, 664 optional_parens: bool = True, 665 any_token: bool = False, 666 ) -> t.Optional[exp.Expression]: 667 expr = super()._parse_function( 668 functions=functions, 669 anonymous=anonymous, 670 optional_parens=optional_parens, 671 any_token=any_token, 672 ) 673 674 func = expr.this if isinstance(expr, exp.Window) else expr 675 676 # Aggregate functions can be split in 2 parts: <func_name><suffix> 677 parts = ( 678 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 679 ) 680 681 if parts: 682 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 683 params = self._parse_func_params(anon_func) 684 685 kwargs = { 686 "this": anon_func.this, 687 "expressions": anon_func.expressions, 688 } 689 if parts[1]: 690 kwargs["parts"] = parts 691 exp_class: t.Type[exp.Expression] = ( 692 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 693 ) 694 else: 695 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 696 697 kwargs["exp_class"] = exp_class 698 if params: 699 kwargs["params"] = params 700 701 func = self.expression(**kwargs) 702 703 if isinstance(expr, exp.Window): 704 # The window's func was parsed as Anonymous in base parser, fix its 705 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 706 expr.set("this", func) 707 elif params: 708 # Params have blocked super()._parse_function() from parsing the following window 709 # (if that exists) as they're standing between the function call and the window spec 710 expr = self._parse_window(func) 711 else: 712 expr = func 713 714 return expr 715 716 def _parse_func_params( 717 self, this: t.Optional[exp.Func] = None 718 ) -> t.Optional[t.List[exp.Expression]]: 719 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 720 return self._parse_csv(self._parse_lambda) 721 722 if self._match(TokenType.L_PAREN): 723 params = self._parse_csv(self._parse_lambda) 724 self._match_r_paren(this) 725 return params 726 727 return None 728 729 def _parse_quantile(self) -> exp.Quantile: 730 this = self._parse_lambda() 731 params = self._parse_func_params() 732 if params: 733 return self.expression(exp.Quantile, this=params[0], quantile=this) 734 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 735 736 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 737 return super()._parse_wrapped_id_vars(optional=True) 738 739 def _parse_primary_key( 740 self, wrapped_optional: bool = False, in_props: bool = False 741 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 742 return super()._parse_primary_key( 743 wrapped_optional=wrapped_optional or in_props, in_props=in_props 744 ) 745 746 def _parse_on_property(self) -> t.Optional[exp.Expression]: 747 index = self._index 748 if self._match_text_seq("CLUSTER"): 749 this = self._parse_id_var() 750 if this: 751 return self.expression(exp.OnCluster, this=this) 752 else: 753 self._retreat(index) 754 return None 755 756 def _parse_index_constraint( 757 self, kind: t.Optional[str] = None 758 ) -> exp.IndexColumnConstraint: 759 # INDEX name1 expr TYPE type1(args) GRANULARITY value 760 this = self._parse_id_var() 761 expression = self._parse_assignment() 762 763 index_type = self._match_text_seq("TYPE") and ( 764 self._parse_function() or self._parse_var() 765 ) 766 767 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 768 769 return self.expression( 770 exp.IndexColumnConstraint, 771 this=this, 772 expression=expression, 773 index_type=index_type, 774 granularity=granularity, 775 ) 776 777 def _parse_partition(self) -> t.Optional[exp.Partition]: 778 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 779 if not self._match(TokenType.PARTITION): 780 return None 781 782 if self._match_text_seq("ID"): 783 # Corresponds to the PARTITION ID <string_value> syntax 784 expressions: t.List[exp.Expression] = [ 785 self.expression(exp.PartitionId, this=self._parse_string()) 786 ] 787 else: 788 expressions = self._parse_expressions() 789 790 return self.expression(exp.Partition, expressions=expressions) 791 792 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 793 partition = self._parse_partition() 794 795 if not partition or not self._match(TokenType.FROM): 796 return None 797 798 return self.expression( 799 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 800 ) 801 802 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 803 if not self._match_text_seq("PROJECTION"): 804 return None 805 806 return self.expression( 807 exp.ProjectionDef, 808 this=self._parse_id_var(), 809 expression=self._parse_wrapped(self._parse_statement), 810 ) 811 812 def _parse_constraint(self) -> t.Optional[exp.Expression]: 813 return super()._parse_constraint() or self._parse_projection_def() 814 815 def _parse_alias( 816 self, this: t.Optional[exp.Expression], explicit: bool = False 817 ) -> t.Optional[exp.Expression]: 818 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 819 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 820 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 821 return this 822 823 return super()._parse_alias(this=this, explicit=explicit) 824 825 def _parse_expression(self) -> t.Optional[exp.Expression]: 826 this = super()._parse_expression() 827 828 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 829 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 830 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 831 self._match(TokenType.R_PAREN) 832 833 return this 834 835 def _parse_columns(self) -> exp.Expression: 836 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 837 838 while self._next and self._match_text_seq(")", "APPLY", "("): 839 self._match(TokenType.R_PAREN) 840 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 841 return this 842 843 class Generator(generator.Generator): 844 QUERY_HINTS = False 845 STRUCT_DELIMITER = ("(", ")") 846 NVL2_SUPPORTED = False 847 TABLESAMPLE_REQUIRES_PARENS = False 848 TABLESAMPLE_SIZE_IS_ROWS = False 849 TABLESAMPLE_KEYWORDS = "SAMPLE" 850 LAST_DAY_SUPPORTS_DATE_PART = False 851 CAN_IMPLEMENT_ARRAY_ANY = True 852 SUPPORTS_TO_NUMBER = False 853 JOIN_HINTS = False 854 TABLE_HINTS = False 855 GROUPINGS_SEP = "" 856 SET_OP_MODIFIERS = False 857 SUPPORTS_TABLE_ALIAS_COLUMNS = False 858 VALUES_AS_TABLE = False 859 ARRAY_SIZE_NAME = "LENGTH" 860 861 STRING_TYPE_MAPPING = { 862 exp.DataType.Type.CHAR: "String", 863 exp.DataType.Type.LONGBLOB: "String", 864 exp.DataType.Type.LONGTEXT: "String", 865 exp.DataType.Type.MEDIUMBLOB: "String", 866 exp.DataType.Type.MEDIUMTEXT: "String", 867 exp.DataType.Type.TINYBLOB: "String", 868 exp.DataType.Type.TINYTEXT: "String", 869 exp.DataType.Type.TEXT: "String", 870 exp.DataType.Type.VARBINARY: "String", 871 exp.DataType.Type.VARCHAR: "String", 872 } 873 874 SUPPORTED_JSON_PATH_PARTS = { 875 exp.JSONPathKey, 876 exp.JSONPathRoot, 877 exp.JSONPathSubscript, 878 } 879 880 TYPE_MAPPING = { 881 **generator.Generator.TYPE_MAPPING, 882 **STRING_TYPE_MAPPING, 883 exp.DataType.Type.ARRAY: "Array", 884 exp.DataType.Type.BOOLEAN: "Bool", 885 exp.DataType.Type.BIGINT: "Int64", 886 exp.DataType.Type.DATE32: "Date32", 887 exp.DataType.Type.DATETIME: "DateTime", 888 exp.DataType.Type.DATETIME2: "DateTime", 889 exp.DataType.Type.SMALLDATETIME: "DateTime", 890 exp.DataType.Type.DATETIME64: "DateTime64", 891 exp.DataType.Type.DECIMAL: "Decimal", 892 exp.DataType.Type.DECIMAL32: "Decimal32", 893 exp.DataType.Type.DECIMAL64: "Decimal64", 894 exp.DataType.Type.DECIMAL128: "Decimal128", 895 exp.DataType.Type.DECIMAL256: "Decimal256", 896 exp.DataType.Type.TIMESTAMP: "DateTime", 897 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 898 exp.DataType.Type.DOUBLE: "Float64", 899 exp.DataType.Type.ENUM: "Enum", 900 exp.DataType.Type.ENUM8: "Enum8", 901 exp.DataType.Type.ENUM16: "Enum16", 902 exp.DataType.Type.FIXEDSTRING: "FixedString", 903 exp.DataType.Type.FLOAT: "Float32", 904 exp.DataType.Type.INT: "Int32", 905 exp.DataType.Type.MEDIUMINT: "Int32", 906 exp.DataType.Type.INT128: "Int128", 907 exp.DataType.Type.INT256: "Int256", 908 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 909 exp.DataType.Type.MAP: "Map", 910 exp.DataType.Type.NESTED: "Nested", 911 exp.DataType.Type.SMALLINT: "Int16", 912 exp.DataType.Type.STRUCT: "Tuple", 913 exp.DataType.Type.TINYINT: "Int8", 914 exp.DataType.Type.UBIGINT: "UInt64", 915 exp.DataType.Type.UINT: "UInt32", 916 exp.DataType.Type.UINT128: "UInt128", 917 exp.DataType.Type.UINT256: "UInt256", 918 exp.DataType.Type.USMALLINT: "UInt16", 919 exp.DataType.Type.UTINYINT: "UInt8", 920 exp.DataType.Type.IPV4: "IPv4", 921 exp.DataType.Type.IPV6: "IPv6", 922 exp.DataType.Type.POINT: "Point", 923 exp.DataType.Type.RING: "Ring", 924 exp.DataType.Type.LINESTRING: "LineString", 925 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 926 exp.DataType.Type.POLYGON: "Polygon", 927 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 928 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 929 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 930 } 931 932 TRANSFORMS = { 933 **generator.Generator.TRANSFORMS, 934 exp.AnyValue: rename_func("any"), 935 exp.ApproxDistinct: rename_func("uniq"), 936 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 937 exp.ArraySum: rename_func("arraySum"), 938 exp.ArgMax: arg_max_or_min_no_count("argMax"), 939 exp.ArgMin: arg_max_or_min_no_count("argMin"), 940 exp.Array: inline_array_sql, 941 exp.CastToStrType: rename_func("CAST"), 942 exp.CountIf: rename_func("countIf"), 943 exp.CompressColumnConstraint: lambda self, 944 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 945 exp.ComputedColumnConstraint: lambda self, 946 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 947 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 948 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 949 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 950 exp.DateStrToDate: rename_func("toDate"), 951 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 952 exp.Explode: rename_func("arrayJoin"), 953 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 954 exp.IsNan: rename_func("isNaN"), 955 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 956 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 957 exp.JSONPathKey: json_path_key_only_name, 958 exp.JSONPathRoot: lambda *_: "", 959 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 960 exp.Median: rename_func("median"), 961 exp.Nullif: rename_func("nullIf"), 962 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 963 exp.Pivot: no_pivot_sql, 964 exp.Quantile: _quantile_sql, 965 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 966 exp.Rand: rename_func("randCanonical"), 967 exp.StartsWith: rename_func("startsWith"), 968 exp.StrPosition: lambda self, e: self.func( 969 "position", e.this, e.args.get("substr"), e.args.get("position") 970 ), 971 exp.TimeToStr: lambda self, e: self.func( 972 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 973 ), 974 exp.TimeStrToTime: _timestrtotime_sql, 975 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 976 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 977 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 978 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 979 exp.MD5Digest: rename_func("MD5"), 980 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 981 exp.SHA: rename_func("SHA1"), 982 exp.SHA2: sha256_sql, 983 exp.UnixToTime: _unix_to_time_sql, 984 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 985 exp.Trim: trim_sql, 986 exp.Variance: rename_func("varSamp"), 987 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 988 exp.Stddev: rename_func("stddevSamp"), 989 exp.Chr: rename_func("CHAR"), 990 exp.Lag: lambda self, e: self.func( 991 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 992 ), 993 exp.Lead: lambda self, e: self.func( 994 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 995 ), 996 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 997 rename_func("editDistance") 998 ), 999 } 1000 1001 PROPERTIES_LOCATION = { 1002 **generator.Generator.PROPERTIES_LOCATION, 1003 exp.OnCluster: exp.Properties.Location.POST_NAME, 1004 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1005 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1006 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1007 } 1008 1009 # There's no list in docs, but it can be found in Clickhouse code 1010 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1011 ON_CLUSTER_TARGETS = { 1012 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1013 "DATABASE", 1014 "TABLE", 1015 "VIEW", 1016 "DICTIONARY", 1017 "INDEX", 1018 "FUNCTION", 1019 "NAMED COLLECTION", 1020 } 1021 1022 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1023 NON_NULLABLE_TYPES = { 1024 exp.DataType.Type.ARRAY, 1025 exp.DataType.Type.MAP, 1026 exp.DataType.Type.STRUCT, 1027 exp.DataType.Type.POINT, 1028 exp.DataType.Type.RING, 1029 exp.DataType.Type.LINESTRING, 1030 exp.DataType.Type.MULTILINESTRING, 1031 exp.DataType.Type.POLYGON, 1032 exp.DataType.Type.MULTIPOLYGON, 1033 } 1034 1035 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1036 strtodate_sql = self.function_fallback_sql(expression) 1037 1038 if not isinstance(expression.parent, exp.Cast): 1039 # StrToDate returns DATEs in other dialects (eg. postgres), so 1040 # this branch aims to improve the transpilation to clickhouse 1041 return f"CAST({strtodate_sql} AS DATE)" 1042 1043 return strtodate_sql 1044 1045 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1046 this = expression.this 1047 1048 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1049 return self.sql(this) 1050 1051 return super().cast_sql(expression, safe_prefix=safe_prefix) 1052 1053 def trycast_sql(self, expression: exp.TryCast) -> str: 1054 dtype = expression.to 1055 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1056 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1057 dtype.set("nullable", True) 1058 1059 return super().cast_sql(expression) 1060 1061 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1062 this = self.json_path_part(expression.this) 1063 return str(int(this) + 1) if is_int(this) else this 1064 1065 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1066 return f"AS {self.sql(expression, 'this')}" 1067 1068 def _any_to_has( 1069 self, 1070 expression: exp.EQ | exp.NEQ, 1071 default: t.Callable[[t.Any], str], 1072 prefix: str = "", 1073 ) -> str: 1074 if isinstance(expression.left, exp.Any): 1075 arr = expression.left 1076 this = expression.right 1077 elif isinstance(expression.right, exp.Any): 1078 arr = expression.right 1079 this = expression.left 1080 else: 1081 return default(expression) 1082 1083 return prefix + self.func("has", arr.this.unnest(), this) 1084 1085 def eq_sql(self, expression: exp.EQ) -> str: 1086 return self._any_to_has(expression, super().eq_sql) 1087 1088 def neq_sql(self, expression: exp.NEQ) -> str: 1089 return self._any_to_has(expression, super().neq_sql, "NOT ") 1090 1091 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1092 # Manually add a flag to make the search case-insensitive 1093 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1094 return self.func("match", expression.this, regex) 1095 1096 def datatype_sql(self, expression: exp.DataType) -> str: 1097 # String is the standard ClickHouse type, every other variant is just an alias. 1098 # Additionally, any supplied length parameter will be ignored. 1099 # 1100 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1101 if expression.this in self.STRING_TYPE_MAPPING: 1102 dtype = "String" 1103 else: 1104 dtype = super().datatype_sql(expression) 1105 1106 # This section changes the type to `Nullable(...)` if the following conditions hold: 1107 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1108 # and change their semantics 1109 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1110 # constraint: "Type of Map key must be a type, that can be represented by integer or 1111 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1112 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1113 parent = expression.parent 1114 nullable = expression.args.get("nullable") 1115 if nullable is True or ( 1116 nullable is None 1117 and not ( 1118 isinstance(parent, exp.DataType) 1119 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1120 and expression.index in (None, 0) 1121 ) 1122 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1123 ): 1124 dtype = f"Nullable({dtype})" 1125 1126 return dtype 1127 1128 def cte_sql(self, expression: exp.CTE) -> str: 1129 if expression.args.get("scalar"): 1130 this = self.sql(expression, "this") 1131 alias = self.sql(expression, "alias") 1132 return f"{this} AS {alias}" 1133 1134 return super().cte_sql(expression) 1135 1136 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1137 return super().after_limit_modifiers(expression) + [ 1138 ( 1139 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1140 if expression.args.get("settings") 1141 else "" 1142 ), 1143 ( 1144 self.seg("FORMAT ") + self.sql(expression, "format") 1145 if expression.args.get("format") 1146 else "" 1147 ), 1148 ] 1149 1150 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1151 params = self.expressions(expression, key="params", flat=True) 1152 return self.func(expression.name, *expression.expressions) + f"({params})" 1153 1154 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1155 return self.func(expression.name, *expression.expressions) 1156 1157 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1158 return self.anonymousaggfunc_sql(expression) 1159 1160 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1161 return self.parameterizedagg_sql(expression) 1162 1163 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1164 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1165 1166 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1167 return f"ON CLUSTER {self.sql(expression, 'this')}" 1168 1169 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1170 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1171 exp.Properties.Location.POST_NAME 1172 ): 1173 this_name = self.sql( 1174 expression.this if isinstance(expression.this, exp.Schema) else expression, 1175 "this", 1176 ) 1177 this_properties = " ".join( 1178 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1179 ) 1180 this_schema = self.schema_columns_sql(expression.this) 1181 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1182 1183 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1184 1185 return super().createable_sql(expression, locations) 1186 1187 def create_sql(self, expression: exp.Create) -> str: 1188 # The comment property comes last in CTAS statements, i.e. after the query 1189 query = expression.expression 1190 if isinstance(query, exp.Query): 1191 comment_prop = expression.find(exp.SchemaCommentProperty) 1192 if comment_prop: 1193 comment_prop.pop() 1194 query.replace(exp.paren(query)) 1195 else: 1196 comment_prop = None 1197 1198 create_sql = super().create_sql(expression) 1199 1200 comment_sql = self.sql(comment_prop) 1201 comment_sql = f" {comment_sql}" if comment_sql else "" 1202 1203 return f"{create_sql}{comment_sql}" 1204 1205 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1206 this = self.indent(self.sql(expression, "this")) 1207 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1208 1209 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1210 this = self.sql(expression, "this") 1211 this = f" {this}" if this else "" 1212 expr = self.sql(expression, "expression") 1213 expr = f" {expr}" if expr else "" 1214 index_type = self.sql(expression, "index_type") 1215 index_type = f" TYPE {index_type}" if index_type else "" 1216 granularity = self.sql(expression, "granularity") 1217 granularity = f" GRANULARITY {granularity}" if granularity else "" 1218 1219 return f"INDEX{this}{expr}{index_type}{granularity}" 1220 1221 def partition_sql(self, expression: exp.Partition) -> str: 1222 return f"PARTITION {self.expressions(expression, flat=True)}" 1223 1224 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1225 return f"ID {self.sql(expression.this)}" 1226 1227 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1228 return ( 1229 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1230 ) 1231 1232 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1233 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1234 1235 def is_sql(self, expression: exp.Is) -> str: 1236 is_sql = super().is_sql(expression) 1237 1238 if isinstance(expression.parent, exp.Not): 1239 # value IS NOT NULL -> NOT (value IS NULL) 1240 is_sql = self.wrap(is_sql) 1241 1242 return is_sql
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
186 class Tokenizer(tokens.Tokenizer): 187 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 188 IDENTIFIERS = ['"', "`"] 189 IDENTIFIER_ESCAPES = ["\\"] 190 STRING_ESCAPES = ["'", "\\"] 191 BIT_STRINGS = [("0b", "")] 192 HEX_STRINGS = [("0x", ""), ("0X", "")] 193 HEREDOC_STRINGS = ["$"] 194 195 KEYWORDS = { 196 **tokens.Tokenizer.KEYWORDS, 197 "ATTACH": TokenType.COMMAND, 198 "DATE32": TokenType.DATE32, 199 "DATETIME64": TokenType.DATETIME64, 200 "DICTIONARY": TokenType.DICTIONARY, 201 "ENUM8": TokenType.ENUM8, 202 "ENUM16": TokenType.ENUM16, 203 "FINAL": TokenType.FINAL, 204 "FIXEDSTRING": TokenType.FIXEDSTRING, 205 "FLOAT32": TokenType.FLOAT, 206 "FLOAT64": TokenType.DOUBLE, 207 "GLOBAL": TokenType.GLOBAL, 208 "INT256": TokenType.INT256, 209 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 210 "MAP": TokenType.MAP, 211 "NESTED": TokenType.NESTED, 212 "SAMPLE": TokenType.TABLE_SAMPLE, 213 "TUPLE": TokenType.STRUCT, 214 "UINT128": TokenType.UINT128, 215 "UINT16": TokenType.USMALLINT, 216 "UINT256": TokenType.UINT256, 217 "UINT32": TokenType.UINT, 218 "UINT64": TokenType.UBIGINT, 219 "UINT8": TokenType.UTINYINT, 220 "IPV4": TokenType.IPV4, 221 "IPV6": TokenType.IPV6, 222 "POINT": TokenType.POINT, 223 "RING": TokenType.RING, 224 "LINESTRING": TokenType.LINESTRING, 225 "MULTILINESTRING": TokenType.MULTILINESTRING, 226 "POLYGON": TokenType.POLYGON, 227 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 228 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 229 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 230 "SYSTEM": TokenType.COMMAND, 231 "PREWHERE": TokenType.PREWHERE, 232 } 233 KEYWORDS.pop("/*+") 234 235 SINGLE_TOKENS = { 236 **tokens.Tokenizer.SINGLE_TOKENS, 237 "$": TokenType.HEREDOC_STRING, 238 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
240 class Parser(parser.Parser): 241 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 242 # * select x from t1 union all select x from t2 limit 1; 243 # * select x from t1 union all (select x from t2 limit 1); 244 MODIFIERS_ATTACHED_TO_SET_OP = False 245 INTERVAL_SPANS = False 246 247 FUNCTIONS = { 248 **parser.Parser.FUNCTIONS, 249 "ANY": exp.AnyValue.from_arg_list, 250 "ARRAYSUM": exp.ArraySum.from_arg_list, 251 "COUNTIF": _build_count_if, 252 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 254 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 256 "DATE_FORMAT": _build_date_format, 257 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 258 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 259 "FORMATDATETIME": _build_date_format, 260 "JSONEXTRACTSTRING": build_json_extract_path( 261 exp.JSONExtractScalar, zero_based_indexing=False 262 ), 263 "MAP": parser.build_var_map, 264 "MATCH": exp.RegexpLike.from_arg_list, 265 "RANDCANONICAL": exp.Rand.from_arg_list, 266 "STR_TO_DATE": _build_str_to_date, 267 "TUPLE": exp.Struct.from_arg_list, 268 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 270 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 272 "UNIQ": exp.ApproxDistinct.from_arg_list, 273 "XOR": lambda args: exp.Xor(expressions=args), 274 "MD5": exp.MD5Digest.from_arg_list, 275 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 276 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 277 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 278 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 279 } 280 FUNCTIONS.pop("TRANSFORM") 281 282 AGG_FUNCTIONS = { 283 "count", 284 "min", 285 "max", 286 "sum", 287 "avg", 288 "any", 289 "stddevPop", 290 "stddevSamp", 291 "varPop", 292 "varSamp", 293 "corr", 294 "covarPop", 295 "covarSamp", 296 "entropy", 297 "exponentialMovingAverage", 298 "intervalLengthSum", 299 "kolmogorovSmirnovTest", 300 "mannWhitneyUTest", 301 "median", 302 "rankCorr", 303 "sumKahan", 304 "studentTTest", 305 "welchTTest", 306 "anyHeavy", 307 "anyLast", 308 "boundingRatio", 309 "first_value", 310 "last_value", 311 "argMin", 312 "argMax", 313 "avgWeighted", 314 "topK", 315 "topKWeighted", 316 "deltaSum", 317 "deltaSumTimestamp", 318 "groupArray", 319 "groupArrayLast", 320 "groupUniqArray", 321 "groupArrayInsertAt", 322 "groupArrayMovingAvg", 323 "groupArrayMovingSum", 324 "groupArraySample", 325 "groupBitAnd", 326 "groupBitOr", 327 "groupBitXor", 328 "groupBitmap", 329 "groupBitmapAnd", 330 "groupBitmapOr", 331 "groupBitmapXor", 332 "sumWithOverflow", 333 "sumMap", 334 "minMap", 335 "maxMap", 336 "skewSamp", 337 "skewPop", 338 "kurtSamp", 339 "kurtPop", 340 "uniq", 341 "uniqExact", 342 "uniqCombined", 343 "uniqCombined64", 344 "uniqHLL12", 345 "uniqTheta", 346 "quantile", 347 "quantiles", 348 "quantileExact", 349 "quantilesExact", 350 "quantileExactLow", 351 "quantilesExactLow", 352 "quantileExactHigh", 353 "quantilesExactHigh", 354 "quantileExactWeighted", 355 "quantilesExactWeighted", 356 "quantileTiming", 357 "quantilesTiming", 358 "quantileTimingWeighted", 359 "quantilesTimingWeighted", 360 "quantileDeterministic", 361 "quantilesDeterministic", 362 "quantileTDigest", 363 "quantilesTDigest", 364 "quantileTDigestWeighted", 365 "quantilesTDigestWeighted", 366 "quantileBFloat16", 367 "quantilesBFloat16", 368 "quantileBFloat16Weighted", 369 "quantilesBFloat16Weighted", 370 "simpleLinearRegression", 371 "stochasticLinearRegression", 372 "stochasticLogisticRegression", 373 "categoricalInformationValue", 374 "contingency", 375 "cramersV", 376 "cramersVBiasCorrected", 377 "theilsU", 378 "maxIntersections", 379 "maxIntersectionsPosition", 380 "meanZTest", 381 "quantileInterpolatedWeighted", 382 "quantilesInterpolatedWeighted", 383 "quantileGK", 384 "quantilesGK", 385 "sparkBar", 386 "sumCount", 387 "largestTriangleThreeBuckets", 388 "histogram", 389 "sequenceMatch", 390 "sequenceCount", 391 "windowFunnel", 392 "retention", 393 "uniqUpTo", 394 "sequenceNextNode", 395 "exponentialTimeDecayedAvg", 396 } 397 398 AGG_FUNCTIONS_SUFFIXES = [ 399 "If", 400 "Array", 401 "ArrayIf", 402 "Map", 403 "SimpleState", 404 "State", 405 "Merge", 406 "MergeState", 407 "ForEach", 408 "Distinct", 409 "OrDefault", 410 "OrNull", 411 "Resample", 412 "ArgMin", 413 "ArgMax", 414 ] 415 416 FUNC_TOKENS = { 417 *parser.Parser.FUNC_TOKENS, 418 TokenType.SET, 419 } 420 421 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 422 423 ID_VAR_TOKENS = { 424 *parser.Parser.ID_VAR_TOKENS, 425 TokenType.LIKE, 426 } 427 428 AGG_FUNC_MAPPING = ( 429 lambda functions, suffixes: { 430 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 431 } 432 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 433 434 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 435 436 FUNCTION_PARSERS = { 437 **parser.Parser.FUNCTION_PARSERS, 438 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 439 "QUANTILE": lambda self: self._parse_quantile(), 440 "MEDIAN": lambda self: self._parse_quantile(), 441 "COLUMNS": lambda self: self._parse_columns(), 442 } 443 444 FUNCTION_PARSERS.pop("MATCH") 445 446 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 447 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 448 449 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 450 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 451 452 RANGE_PARSERS = { 453 **parser.Parser.RANGE_PARSERS, 454 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 455 and self._parse_in(this, is_global=True), 456 } 457 458 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 459 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 460 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 461 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 462 463 JOIN_KINDS = { 464 *parser.Parser.JOIN_KINDS, 465 TokenType.ANY, 466 TokenType.ASOF, 467 TokenType.ARRAY, 468 } 469 470 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 471 TokenType.ANY, 472 TokenType.ARRAY, 473 TokenType.FINAL, 474 TokenType.FORMAT, 475 TokenType.SETTINGS, 476 } 477 478 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 479 TokenType.FORMAT, 480 } 481 482 LOG_DEFAULTS_TO_LN = True 483 484 QUERY_MODIFIER_PARSERS = { 485 **parser.Parser.QUERY_MODIFIER_PARSERS, 486 TokenType.SETTINGS: lambda self: ( 487 "settings", 488 self._advance() or self._parse_csv(self._parse_assignment), 489 ), 490 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 491 } 492 493 CONSTRAINT_PARSERS = { 494 **parser.Parser.CONSTRAINT_PARSERS, 495 "INDEX": lambda self: self._parse_index_constraint(), 496 "CODEC": lambda self: self._parse_compress(), 497 } 498 499 ALTER_PARSERS = { 500 **parser.Parser.ALTER_PARSERS, 501 "REPLACE": lambda self: self._parse_alter_table_replace(), 502 } 503 504 SCHEMA_UNNAMED_CONSTRAINTS = { 505 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 506 "INDEX", 507 } 508 509 PLACEHOLDER_PARSERS = { 510 **parser.Parser.PLACEHOLDER_PARSERS, 511 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 512 } 513 514 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 515 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 516 return self._parse_lambda() 517 518 def _parse_types( 519 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 520 ) -> t.Optional[exp.Expression]: 521 dtype = super()._parse_types( 522 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 523 ) 524 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 525 # Mark every type as non-nullable which is ClickHouse's default, unless it's 526 # already marked as nullable. This marker helps us transpile types from other 527 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 528 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 529 # fail in ClickHouse without the `Nullable` type constructor. 530 dtype.set("nullable", False) 531 532 return dtype 533 534 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 535 index = self._index 536 this = self._parse_bitwise() 537 if self._match(TokenType.FROM): 538 self._retreat(index) 539 return super()._parse_extract() 540 541 # We return Anonymous here because extract and regexpExtract have different semantics, 542 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 543 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 544 # 545 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 546 self._match(TokenType.COMMA) 547 return self.expression( 548 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 549 ) 550 551 def _parse_assignment(self) -> t.Optional[exp.Expression]: 552 this = super()._parse_assignment() 553 554 if self._match(TokenType.PLACEHOLDER): 555 return self.expression( 556 exp.If, 557 this=this, 558 true=self._parse_assignment(), 559 false=self._match(TokenType.COLON) and self._parse_assignment(), 560 ) 561 562 return this 563 564 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 565 """ 566 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 567 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 568 """ 569 this = self._parse_id_var() 570 self._match(TokenType.COLON) 571 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 572 self._match_text_seq("IDENTIFIER") and "Identifier" 573 ) 574 575 if not kind: 576 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 577 elif not self._match(TokenType.R_BRACE): 578 self.raise_error("Expecting }") 579 580 return self.expression(exp.Placeholder, this=this, kind=kind) 581 582 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 583 this = super()._parse_in(this) 584 this.set("is_global", is_global) 585 return this 586 587 def _parse_table( 588 self, 589 schema: bool = False, 590 joins: bool = False, 591 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 592 parse_bracket: bool = False, 593 is_db_reference: bool = False, 594 parse_partition: bool = False, 595 ) -> t.Optional[exp.Expression]: 596 this = super()._parse_table( 597 schema=schema, 598 joins=joins, 599 alias_tokens=alias_tokens, 600 parse_bracket=parse_bracket, 601 is_db_reference=is_db_reference, 602 ) 603 604 if self._match(TokenType.FINAL): 605 this = self.expression(exp.Final, this=this) 606 607 return this 608 609 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 610 return super()._parse_position(haystack_first=True) 611 612 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 613 def _parse_cte(self) -> exp.CTE: 614 # WITH <identifier> AS <subquery expression> 615 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 616 617 if not cte: 618 # WITH <expression> AS <identifier> 619 cte = self.expression( 620 exp.CTE, 621 this=self._parse_assignment(), 622 alias=self._parse_table_alias(), 623 scalar=True, 624 ) 625 626 return cte 627 628 def _parse_join_parts( 629 self, 630 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 631 is_global = self._match(TokenType.GLOBAL) and self._prev 632 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 633 634 if kind_pre: 635 kind = self._match_set(self.JOIN_KINDS) and self._prev 636 side = self._match_set(self.JOIN_SIDES) and self._prev 637 return is_global, side, kind 638 639 return ( 640 is_global, 641 self._match_set(self.JOIN_SIDES) and self._prev, 642 self._match_set(self.JOIN_KINDS) and self._prev, 643 ) 644 645 def _parse_join( 646 self, skip_join_token: bool = False, parse_bracket: bool = False 647 ) -> t.Optional[exp.Join]: 648 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 649 if join: 650 join.set("global", join.args.pop("method", None)) 651 652 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 653 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 654 if join.kind == "ARRAY": 655 for table in join.find_all(exp.Table): 656 table.replace(table.to_column()) 657 658 return join 659 660 def _parse_function( 661 self, 662 functions: t.Optional[t.Dict[str, t.Callable]] = None, 663 anonymous: bool = False, 664 optional_parens: bool = True, 665 any_token: bool = False, 666 ) -> t.Optional[exp.Expression]: 667 expr = super()._parse_function( 668 functions=functions, 669 anonymous=anonymous, 670 optional_parens=optional_parens, 671 any_token=any_token, 672 ) 673 674 func = expr.this if isinstance(expr, exp.Window) else expr 675 676 # Aggregate functions can be split in 2 parts: <func_name><suffix> 677 parts = ( 678 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 679 ) 680 681 if parts: 682 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 683 params = self._parse_func_params(anon_func) 684 685 kwargs = { 686 "this": anon_func.this, 687 "expressions": anon_func.expressions, 688 } 689 if parts[1]: 690 kwargs["parts"] = parts 691 exp_class: t.Type[exp.Expression] = ( 692 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 693 ) 694 else: 695 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 696 697 kwargs["exp_class"] = exp_class 698 if params: 699 kwargs["params"] = params 700 701 func = self.expression(**kwargs) 702 703 if isinstance(expr, exp.Window): 704 # The window's func was parsed as Anonymous in base parser, fix its 705 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 706 expr.set("this", func) 707 elif params: 708 # Params have blocked super()._parse_function() from parsing the following window 709 # (if that exists) as they're standing between the function call and the window spec 710 expr = self._parse_window(func) 711 else: 712 expr = func 713 714 return expr 715 716 def _parse_func_params( 717 self, this: t.Optional[exp.Func] = None 718 ) -> t.Optional[t.List[exp.Expression]]: 719 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 720 return self._parse_csv(self._parse_lambda) 721 722 if self._match(TokenType.L_PAREN): 723 params = self._parse_csv(self._parse_lambda) 724 self._match_r_paren(this) 725 return params 726 727 return None 728 729 def _parse_quantile(self) -> exp.Quantile: 730 this = self._parse_lambda() 731 params = self._parse_func_params() 732 if params: 733 return self.expression(exp.Quantile, this=params[0], quantile=this) 734 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 735 736 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 737 return super()._parse_wrapped_id_vars(optional=True) 738 739 def _parse_primary_key( 740 self, wrapped_optional: bool = False, in_props: bool = False 741 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 742 return super()._parse_primary_key( 743 wrapped_optional=wrapped_optional or in_props, in_props=in_props 744 ) 745 746 def _parse_on_property(self) -> t.Optional[exp.Expression]: 747 index = self._index 748 if self._match_text_seq("CLUSTER"): 749 this = self._parse_id_var() 750 if this: 751 return self.expression(exp.OnCluster, this=this) 752 else: 753 self._retreat(index) 754 return None 755 756 def _parse_index_constraint( 757 self, kind: t.Optional[str] = None 758 ) -> exp.IndexColumnConstraint: 759 # INDEX name1 expr TYPE type1(args) GRANULARITY value 760 this = self._parse_id_var() 761 expression = self._parse_assignment() 762 763 index_type = self._match_text_seq("TYPE") and ( 764 self._parse_function() or self._parse_var() 765 ) 766 767 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 768 769 return self.expression( 770 exp.IndexColumnConstraint, 771 this=this, 772 expression=expression, 773 index_type=index_type, 774 granularity=granularity, 775 ) 776 777 def _parse_partition(self) -> t.Optional[exp.Partition]: 778 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 779 if not self._match(TokenType.PARTITION): 780 return None 781 782 if self._match_text_seq("ID"): 783 # Corresponds to the PARTITION ID <string_value> syntax 784 expressions: t.List[exp.Expression] = [ 785 self.expression(exp.PartitionId, this=self._parse_string()) 786 ] 787 else: 788 expressions = self._parse_expressions() 789 790 return self.expression(exp.Partition, expressions=expressions) 791 792 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 793 partition = self._parse_partition() 794 795 if not partition or not self._match(TokenType.FROM): 796 return None 797 798 return self.expression( 799 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 800 ) 801 802 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 803 if not self._match_text_seq("PROJECTION"): 804 return None 805 806 return self.expression( 807 exp.ProjectionDef, 808 this=self._parse_id_var(), 809 expression=self._parse_wrapped(self._parse_statement), 810 ) 811 812 def _parse_constraint(self) -> t.Optional[exp.Expression]: 813 return super()._parse_constraint() or self._parse_projection_def() 814 815 def _parse_alias( 816 self, this: t.Optional[exp.Expression], explicit: bool = False 817 ) -> t.Optional[exp.Expression]: 818 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 819 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 820 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 821 return this 822 823 return super()._parse_alias(this=this, explicit=explicit) 824 825 def _parse_expression(self) -> t.Optional[exp.Expression]: 826 this = super()._parse_expression() 827 828 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 829 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 830 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 831 self._match(TokenType.R_PAREN) 832 833 return this 834 835 def _parse_columns(self) -> exp.Expression: 836 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 837 838 while self._next and self._match_text_seq(")", "APPLY", "("): 839 self._match(TokenType.R_PAREN) 840 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 841 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
843 class Generator(generator.Generator): 844 QUERY_HINTS = False 845 STRUCT_DELIMITER = ("(", ")") 846 NVL2_SUPPORTED = False 847 TABLESAMPLE_REQUIRES_PARENS = False 848 TABLESAMPLE_SIZE_IS_ROWS = False 849 TABLESAMPLE_KEYWORDS = "SAMPLE" 850 LAST_DAY_SUPPORTS_DATE_PART = False 851 CAN_IMPLEMENT_ARRAY_ANY = True 852 SUPPORTS_TO_NUMBER = False 853 JOIN_HINTS = False 854 TABLE_HINTS = False 855 GROUPINGS_SEP = "" 856 SET_OP_MODIFIERS = False 857 SUPPORTS_TABLE_ALIAS_COLUMNS = False 858 VALUES_AS_TABLE = False 859 ARRAY_SIZE_NAME = "LENGTH" 860 861 STRING_TYPE_MAPPING = { 862 exp.DataType.Type.CHAR: "String", 863 exp.DataType.Type.LONGBLOB: "String", 864 exp.DataType.Type.LONGTEXT: "String", 865 exp.DataType.Type.MEDIUMBLOB: "String", 866 exp.DataType.Type.MEDIUMTEXT: "String", 867 exp.DataType.Type.TINYBLOB: "String", 868 exp.DataType.Type.TINYTEXT: "String", 869 exp.DataType.Type.TEXT: "String", 870 exp.DataType.Type.VARBINARY: "String", 871 exp.DataType.Type.VARCHAR: "String", 872 } 873 874 SUPPORTED_JSON_PATH_PARTS = { 875 exp.JSONPathKey, 876 exp.JSONPathRoot, 877 exp.JSONPathSubscript, 878 } 879 880 TYPE_MAPPING = { 881 **generator.Generator.TYPE_MAPPING, 882 **STRING_TYPE_MAPPING, 883 exp.DataType.Type.ARRAY: "Array", 884 exp.DataType.Type.BOOLEAN: "Bool", 885 exp.DataType.Type.BIGINT: "Int64", 886 exp.DataType.Type.DATE32: "Date32", 887 exp.DataType.Type.DATETIME: "DateTime", 888 exp.DataType.Type.DATETIME2: "DateTime", 889 exp.DataType.Type.SMALLDATETIME: "DateTime", 890 exp.DataType.Type.DATETIME64: "DateTime64", 891 exp.DataType.Type.DECIMAL: "Decimal", 892 exp.DataType.Type.DECIMAL32: "Decimal32", 893 exp.DataType.Type.DECIMAL64: "Decimal64", 894 exp.DataType.Type.DECIMAL128: "Decimal128", 895 exp.DataType.Type.DECIMAL256: "Decimal256", 896 exp.DataType.Type.TIMESTAMP: "DateTime", 897 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 898 exp.DataType.Type.DOUBLE: "Float64", 899 exp.DataType.Type.ENUM: "Enum", 900 exp.DataType.Type.ENUM8: "Enum8", 901 exp.DataType.Type.ENUM16: "Enum16", 902 exp.DataType.Type.FIXEDSTRING: "FixedString", 903 exp.DataType.Type.FLOAT: "Float32", 904 exp.DataType.Type.INT: "Int32", 905 exp.DataType.Type.MEDIUMINT: "Int32", 906 exp.DataType.Type.INT128: "Int128", 907 exp.DataType.Type.INT256: "Int256", 908 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 909 exp.DataType.Type.MAP: "Map", 910 exp.DataType.Type.NESTED: "Nested", 911 exp.DataType.Type.SMALLINT: "Int16", 912 exp.DataType.Type.STRUCT: "Tuple", 913 exp.DataType.Type.TINYINT: "Int8", 914 exp.DataType.Type.UBIGINT: "UInt64", 915 exp.DataType.Type.UINT: "UInt32", 916 exp.DataType.Type.UINT128: "UInt128", 917 exp.DataType.Type.UINT256: "UInt256", 918 exp.DataType.Type.USMALLINT: "UInt16", 919 exp.DataType.Type.UTINYINT: "UInt8", 920 exp.DataType.Type.IPV4: "IPv4", 921 exp.DataType.Type.IPV6: "IPv6", 922 exp.DataType.Type.POINT: "Point", 923 exp.DataType.Type.RING: "Ring", 924 exp.DataType.Type.LINESTRING: "LineString", 925 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 926 exp.DataType.Type.POLYGON: "Polygon", 927 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 928 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 929 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 930 } 931 932 TRANSFORMS = { 933 **generator.Generator.TRANSFORMS, 934 exp.AnyValue: rename_func("any"), 935 exp.ApproxDistinct: rename_func("uniq"), 936 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 937 exp.ArraySum: rename_func("arraySum"), 938 exp.ArgMax: arg_max_or_min_no_count("argMax"), 939 exp.ArgMin: arg_max_or_min_no_count("argMin"), 940 exp.Array: inline_array_sql, 941 exp.CastToStrType: rename_func("CAST"), 942 exp.CountIf: rename_func("countIf"), 943 exp.CompressColumnConstraint: lambda self, 944 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 945 exp.ComputedColumnConstraint: lambda self, 946 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 947 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 948 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 949 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 950 exp.DateStrToDate: rename_func("toDate"), 951 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 952 exp.Explode: rename_func("arrayJoin"), 953 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 954 exp.IsNan: rename_func("isNaN"), 955 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 956 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 957 exp.JSONPathKey: json_path_key_only_name, 958 exp.JSONPathRoot: lambda *_: "", 959 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 960 exp.Median: rename_func("median"), 961 exp.Nullif: rename_func("nullIf"), 962 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 963 exp.Pivot: no_pivot_sql, 964 exp.Quantile: _quantile_sql, 965 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 966 exp.Rand: rename_func("randCanonical"), 967 exp.StartsWith: rename_func("startsWith"), 968 exp.StrPosition: lambda self, e: self.func( 969 "position", e.this, e.args.get("substr"), e.args.get("position") 970 ), 971 exp.TimeToStr: lambda self, e: self.func( 972 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 973 ), 974 exp.TimeStrToTime: _timestrtotime_sql, 975 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 976 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 977 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 978 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 979 exp.MD5Digest: rename_func("MD5"), 980 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 981 exp.SHA: rename_func("SHA1"), 982 exp.SHA2: sha256_sql, 983 exp.UnixToTime: _unix_to_time_sql, 984 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 985 exp.Trim: trim_sql, 986 exp.Variance: rename_func("varSamp"), 987 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 988 exp.Stddev: rename_func("stddevSamp"), 989 exp.Chr: rename_func("CHAR"), 990 exp.Lag: lambda self, e: self.func( 991 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 992 ), 993 exp.Lead: lambda self, e: self.func( 994 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 995 ), 996 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 997 rename_func("editDistance") 998 ), 999 } 1000 1001 PROPERTIES_LOCATION = { 1002 **generator.Generator.PROPERTIES_LOCATION, 1003 exp.OnCluster: exp.Properties.Location.POST_NAME, 1004 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1005 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1006 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1007 } 1008 1009 # There's no list in docs, but it can be found in Clickhouse code 1010 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1011 ON_CLUSTER_TARGETS = { 1012 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1013 "DATABASE", 1014 "TABLE", 1015 "VIEW", 1016 "DICTIONARY", 1017 "INDEX", 1018 "FUNCTION", 1019 "NAMED COLLECTION", 1020 } 1021 1022 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1023 NON_NULLABLE_TYPES = { 1024 exp.DataType.Type.ARRAY, 1025 exp.DataType.Type.MAP, 1026 exp.DataType.Type.STRUCT, 1027 exp.DataType.Type.POINT, 1028 exp.DataType.Type.RING, 1029 exp.DataType.Type.LINESTRING, 1030 exp.DataType.Type.MULTILINESTRING, 1031 exp.DataType.Type.POLYGON, 1032 exp.DataType.Type.MULTIPOLYGON, 1033 } 1034 1035 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1036 strtodate_sql = self.function_fallback_sql(expression) 1037 1038 if not isinstance(expression.parent, exp.Cast): 1039 # StrToDate returns DATEs in other dialects (eg. postgres), so 1040 # this branch aims to improve the transpilation to clickhouse 1041 return f"CAST({strtodate_sql} AS DATE)" 1042 1043 return strtodate_sql 1044 1045 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1046 this = expression.this 1047 1048 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1049 return self.sql(this) 1050 1051 return super().cast_sql(expression, safe_prefix=safe_prefix) 1052 1053 def trycast_sql(self, expression: exp.TryCast) -> str: 1054 dtype = expression.to 1055 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1056 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1057 dtype.set("nullable", True) 1058 1059 return super().cast_sql(expression) 1060 1061 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1062 this = self.json_path_part(expression.this) 1063 return str(int(this) + 1) if is_int(this) else this 1064 1065 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1066 return f"AS {self.sql(expression, 'this')}" 1067 1068 def _any_to_has( 1069 self, 1070 expression: exp.EQ | exp.NEQ, 1071 default: t.Callable[[t.Any], str], 1072 prefix: str = "", 1073 ) -> str: 1074 if isinstance(expression.left, exp.Any): 1075 arr = expression.left 1076 this = expression.right 1077 elif isinstance(expression.right, exp.Any): 1078 arr = expression.right 1079 this = expression.left 1080 else: 1081 return default(expression) 1082 1083 return prefix + self.func("has", arr.this.unnest(), this) 1084 1085 def eq_sql(self, expression: exp.EQ) -> str: 1086 return self._any_to_has(expression, super().eq_sql) 1087 1088 def neq_sql(self, expression: exp.NEQ) -> str: 1089 return self._any_to_has(expression, super().neq_sql, "NOT ") 1090 1091 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1092 # Manually add a flag to make the search case-insensitive 1093 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1094 return self.func("match", expression.this, regex) 1095 1096 def datatype_sql(self, expression: exp.DataType) -> str: 1097 # String is the standard ClickHouse type, every other variant is just an alias. 1098 # Additionally, any supplied length parameter will be ignored. 1099 # 1100 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1101 if expression.this in self.STRING_TYPE_MAPPING: 1102 dtype = "String" 1103 else: 1104 dtype = super().datatype_sql(expression) 1105 1106 # This section changes the type to `Nullable(...)` if the following conditions hold: 1107 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1108 # and change their semantics 1109 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1110 # constraint: "Type of Map key must be a type, that can be represented by integer or 1111 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1112 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1113 parent = expression.parent 1114 nullable = expression.args.get("nullable") 1115 if nullable is True or ( 1116 nullable is None 1117 and not ( 1118 isinstance(parent, exp.DataType) 1119 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1120 and expression.index in (None, 0) 1121 ) 1122 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1123 ): 1124 dtype = f"Nullable({dtype})" 1125 1126 return dtype 1127 1128 def cte_sql(self, expression: exp.CTE) -> str: 1129 if expression.args.get("scalar"): 1130 this = self.sql(expression, "this") 1131 alias = self.sql(expression, "alias") 1132 return f"{this} AS {alias}" 1133 1134 return super().cte_sql(expression) 1135 1136 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1137 return super().after_limit_modifiers(expression) + [ 1138 ( 1139 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1140 if expression.args.get("settings") 1141 else "" 1142 ), 1143 ( 1144 self.seg("FORMAT ") + self.sql(expression, "format") 1145 if expression.args.get("format") 1146 else "" 1147 ), 1148 ] 1149 1150 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1151 params = self.expressions(expression, key="params", flat=True) 1152 return self.func(expression.name, *expression.expressions) + f"({params})" 1153 1154 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1155 return self.func(expression.name, *expression.expressions) 1156 1157 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1158 return self.anonymousaggfunc_sql(expression) 1159 1160 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1161 return self.parameterizedagg_sql(expression) 1162 1163 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1164 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1165 1166 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1167 return f"ON CLUSTER {self.sql(expression, 'this')}" 1168 1169 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1170 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1171 exp.Properties.Location.POST_NAME 1172 ): 1173 this_name = self.sql( 1174 expression.this if isinstance(expression.this, exp.Schema) else expression, 1175 "this", 1176 ) 1177 this_properties = " ".join( 1178 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1179 ) 1180 this_schema = self.schema_columns_sql(expression.this) 1181 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1182 1183 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1184 1185 return super().createable_sql(expression, locations) 1186 1187 def create_sql(self, expression: exp.Create) -> str: 1188 # The comment property comes last in CTAS statements, i.e. after the query 1189 query = expression.expression 1190 if isinstance(query, exp.Query): 1191 comment_prop = expression.find(exp.SchemaCommentProperty) 1192 if comment_prop: 1193 comment_prop.pop() 1194 query.replace(exp.paren(query)) 1195 else: 1196 comment_prop = None 1197 1198 create_sql = super().create_sql(expression) 1199 1200 comment_sql = self.sql(comment_prop) 1201 comment_sql = f" {comment_sql}" if comment_sql else "" 1202 1203 return f"{create_sql}{comment_sql}" 1204 1205 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1206 this = self.indent(self.sql(expression, "this")) 1207 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1208 1209 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1210 this = self.sql(expression, "this") 1211 this = f" {this}" if this else "" 1212 expr = self.sql(expression, "expression") 1213 expr = f" {expr}" if expr else "" 1214 index_type = self.sql(expression, "index_type") 1215 index_type = f" TYPE {index_type}" if index_type else "" 1216 granularity = self.sql(expression, "granularity") 1217 granularity = f" GRANULARITY {granularity}" if granularity else "" 1218 1219 return f"INDEX{this}{expr}{index_type}{granularity}" 1220 1221 def partition_sql(self, expression: exp.Partition) -> str: 1222 return f"PARTITION {self.expressions(expression, flat=True)}" 1223 1224 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1225 return f"ID {self.sql(expression.this)}" 1226 1227 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1228 return ( 1229 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1230 ) 1231 1232 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1233 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1234 1235 def is_sql(self, expression: exp.Is) -> str: 1236 is_sql = super().is_sql(expression) 1237 1238 if isinstance(expression.parent, exp.Not): 1239 # value IS NOT NULL -> NOT (value IS NULL) 1240 is_sql = self.wrap(is_sql) 1241 1242 return is_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1035 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1036 strtodate_sql = self.function_fallback_sql(expression) 1037 1038 if not isinstance(expression.parent, exp.Cast): 1039 # StrToDate returns DATEs in other dialects (eg. postgres), so 1040 # this branch aims to improve the transpilation to clickhouse 1041 return f"CAST({strtodate_sql} AS DATE)" 1042 1043 return strtodate_sql
1045 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1046 this = expression.this 1047 1048 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1049 return self.sql(this) 1050 1051 return super().cast_sql(expression, safe_prefix=safe_prefix)
1053 def trycast_sql(self, expression: exp.TryCast) -> str: 1054 dtype = expression.to 1055 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1056 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1057 dtype.set("nullable", True) 1058 1059 return super().cast_sql(expression)
1096 def datatype_sql(self, expression: exp.DataType) -> str: 1097 # String is the standard ClickHouse type, every other variant is just an alias. 1098 # Additionally, any supplied length parameter will be ignored. 1099 # 1100 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1101 if expression.this in self.STRING_TYPE_MAPPING: 1102 dtype = "String" 1103 else: 1104 dtype = super().datatype_sql(expression) 1105 1106 # This section changes the type to `Nullable(...)` if the following conditions hold: 1107 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1108 # and change their semantics 1109 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1110 # constraint: "Type of Map key must be a type, that can be represented by integer or 1111 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1112 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1113 parent = expression.parent 1114 nullable = expression.args.get("nullable") 1115 if nullable is True or ( 1116 nullable is None 1117 and not ( 1118 isinstance(parent, exp.DataType) 1119 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1120 and expression.index in (None, 0) 1121 ) 1122 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1123 ): 1124 dtype = f"Nullable({dtype})" 1125 1126 return dtype
1136 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1137 return super().after_limit_modifiers(expression) + [ 1138 ( 1139 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1140 if expression.args.get("settings") 1141 else "" 1142 ), 1143 ( 1144 self.seg("FORMAT ") + self.sql(expression, "format") 1145 if expression.args.get("format") 1146 else "" 1147 ), 1148 ]
1169 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1170 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1171 exp.Properties.Location.POST_NAME 1172 ): 1173 this_name = self.sql( 1174 expression.this if isinstance(expression.this, exp.Schema) else expression, 1175 "this", 1176 ) 1177 this_properties = " ".join( 1178 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1179 ) 1180 this_schema = self.schema_columns_sql(expression.this) 1181 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1182 1183 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1184 1185 return super().createable_sql(expression, locations)
1187 def create_sql(self, expression: exp.Create) -> str: 1188 # The comment property comes last in CTAS statements, i.e. after the query 1189 query = expression.expression 1190 if isinstance(query, exp.Query): 1191 comment_prop = expression.find(exp.SchemaCommentProperty) 1192 if comment_prop: 1193 comment_prop.pop() 1194 query.replace(exp.paren(query)) 1195 else: 1196 comment_prop = None 1197 1198 create_sql = super().create_sql(expression) 1199 1200 comment_sql = self.sql(comment_prop) 1201 comment_sql = f" {comment_sql}" if comment_sql else "" 1202 1203 return f"{create_sql}{comment_sql}"
1209 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1210 this = self.sql(expression, "this") 1211 this = f" {this}" if this else "" 1212 expr = self.sql(expression, "expression") 1213 expr = f" {expr}" if expr else "" 1214 index_type = self.sql(expression, "index_type") 1215 index_type = f" TYPE {index_type}" if index_type else "" 1216 granularity = self.sql(expression, "granularity") 1217 granularity = f" GRANULARITY {granularity}" if granularity else "" 1218 1219 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql