sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 length_or_char_length_sql, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 sha256_sql, 19 strposition_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28from sqlglot.generator import unsupported_args 29 30DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 31 32 33def _build_date_format(args: t.List) -> exp.TimeToStr: 34 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 35 36 timezone = seq_get(args, 2) 37 if timezone: 38 expr.set("zone", timezone) 39 40 return expr 41 42 43def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 44 scale = expression.args.get("scale") 45 timestamp = expression.this 46 47 if scale in (None, exp.UnixToTime.SECONDS): 48 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.MILLIS: 50 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 if scale == exp.UnixToTime.MICROS: 52 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 53 if scale == exp.UnixToTime.NANOS: 54 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 55 56 return self.func( 57 "fromUnixTimestamp", 58 exp.cast( 59 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 60 ), 61 ) 62 63 64def _lower_func(sql: str) -> str: 65 index = sql.index("(") 66 return sql[:index].lower() + sql[index:] 67 68 69def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 70 quantile = expression.args["quantile"] 71 args = f"({self.sql(expression, 'this')})" 72 73 if isinstance(quantile, exp.Array): 74 func = self.func("quantiles", *quantile) 75 else: 76 func = self.func("quantile", quantile) 77 78 return func + args 79 80 81def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 82 if len(args) == 1: 83 return exp.CountIf(this=seq_get(args, 0)) 84 85 return exp.CombinedAggFunc(this="countIf", expressions=args) 86 87 88def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 89 if len(args) == 3: 90 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 91 92 strtodate = exp.StrToDate.from_arg_list(args) 93 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 94 95 96def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 97 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 98 if not expression.unit: 99 return rename_func(name)(self, expression) 100 101 return self.func( 102 name, 103 unit_to_var(expression), 104 expression.expression, 105 expression.this, 106 expression.args.get("zone"), 107 ) 108 109 return _delta_sql 110 111 112def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 113 ts = expression.this 114 115 tz = expression.args.get("zone") 116 if tz and isinstance(ts, exp.Literal): 117 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 118 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 119 # 120 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 121 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 122 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 123 ts_string = ts.name.strip() 124 125 # separate [date and time] from [fractional seconds and UTC offset] 126 ts_parts = ts_string.split(".") 127 if len(ts_parts) == 2: 128 # separate fractional seconds and UTC offset 129 offset_sep = "+" if "+" in ts_parts[1] else "-" 130 ts_frac_parts = ts_parts[1].split(offset_sep) 131 num_frac_parts = len(ts_frac_parts) 132 133 # pad to 6 digits if fractional seconds present 134 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 135 ts_string = "".join( 136 [ 137 ts_parts[0], # date and time 138 ".", 139 ts_frac_parts[0], # fractional seconds 140 offset_sep if num_frac_parts > 1 else "", 141 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 142 ] 143 ) 144 145 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 146 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 147 # it's part of the timestamp string 148 ts_without_tz = ( 149 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 150 ) 151 ts = exp.Literal.string(ts_without_tz) 152 153 # Non-nullable DateTime64 with microsecond precision 154 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 155 datatype = exp.DataType.build( 156 exp.DataType.Type.DATETIME64, 157 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 158 nullable=False, 159 ) 160 161 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 162 163 164def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str: 165 if not (expression.parent and expression.parent.arg_key == "settings"): 166 return _lower_func(var_map_sql(self, expression)) 167 168 keys = expression.args.get("keys") 169 values = expression.args.get("values") 170 171 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 172 self.unsupported("Cannot convert array columns into map.") 173 return "" 174 175 args = [] 176 for key, value in zip(keys.expressions, values.expressions): 177 args.append(f"{self.sql(key)}: {self.sql(value)}") 178 179 csv_args = ", ".join(args) 180 181 return f"{{{csv_args}}}" 182 183 184class ClickHouse(Dialect): 185 NORMALIZE_FUNCTIONS: bool | str = False 186 NULL_ORDERING = "nulls_are_last" 187 SUPPORTS_USER_DEFINED_TYPES = False 188 SAFE_DIVISION = True 189 LOG_BASE_FIRST: t.Optional[bool] = None 190 FORCE_EARLY_ALIAS_REF_EXPANSION = True 191 PRESERVE_ORIGINAL_NAMES = True 192 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 193 IDENTIFIERS_CAN_START_WITH_DIGIT = True 194 HEX_STRING_IS_INTEGER_TYPE = True 195 196 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 197 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 198 199 UNESCAPED_SEQUENCES = { 200 "\\0": "\0", 201 } 202 203 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 204 205 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 206 exp.Except: False, 207 exp.Intersect: False, 208 exp.Union: None, 209 } 210 211 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 212 # Clickhouse allows VALUES to have an embedded structure e.g: 213 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 214 # In this case, we don't want to qualify the columns 215 values = expression.expressions[0].expressions 216 217 structure = ( 218 values[0] 219 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 220 else None 221 ) 222 if structure: 223 # Split each column definition into the column name e.g: 224 # 'person String, place String' -> ['person', 'place'] 225 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 226 column_aliases = [ 227 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 228 ] 229 else: 230 # Default column aliases in CH are "c1", "c2", etc. 231 column_aliases = [ 232 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 233 ] 234 235 return column_aliases 236 237 class Tokenizer(tokens.Tokenizer): 238 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 239 IDENTIFIERS = ['"', "`"] 240 IDENTIFIER_ESCAPES = ["\\"] 241 STRING_ESCAPES = ["'", "\\"] 242 BIT_STRINGS = [("0b", "")] 243 HEX_STRINGS = [("0x", ""), ("0X", "")] 244 HEREDOC_STRINGS = ["$"] 245 246 KEYWORDS = { 247 **tokens.Tokenizer.KEYWORDS, 248 ".:": TokenType.DOTCOLON, 249 "ATTACH": TokenType.COMMAND, 250 "DATE32": TokenType.DATE32, 251 "DATETIME64": TokenType.DATETIME64, 252 "DICTIONARY": TokenType.DICTIONARY, 253 "DYNAMIC": TokenType.DYNAMIC, 254 "ENUM8": TokenType.ENUM8, 255 "ENUM16": TokenType.ENUM16, 256 "EXCHANGE": TokenType.COMMAND, 257 "FINAL": TokenType.FINAL, 258 "FIXEDSTRING": TokenType.FIXEDSTRING, 259 "FLOAT32": TokenType.FLOAT, 260 "FLOAT64": TokenType.DOUBLE, 261 "GLOBAL": TokenType.GLOBAL, 262 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 263 "MAP": TokenType.MAP, 264 "NESTED": TokenType.NESTED, 265 "NOTHING": TokenType.NOTHING, 266 "SAMPLE": TokenType.TABLE_SAMPLE, 267 "TUPLE": TokenType.STRUCT, 268 "UINT16": TokenType.USMALLINT, 269 "UINT32": TokenType.UINT, 270 "UINT64": TokenType.UBIGINT, 271 "UINT8": TokenType.UTINYINT, 272 "IPV4": TokenType.IPV4, 273 "IPV6": TokenType.IPV6, 274 "POINT": TokenType.POINT, 275 "RING": TokenType.RING, 276 "LINESTRING": TokenType.LINESTRING, 277 "MULTILINESTRING": TokenType.MULTILINESTRING, 278 "POLYGON": TokenType.POLYGON, 279 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 280 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 281 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 282 "SYSTEM": TokenType.COMMAND, 283 "PREWHERE": TokenType.PREWHERE, 284 } 285 KEYWORDS.pop("/*+") 286 287 SINGLE_TOKENS = { 288 **tokens.Tokenizer.SINGLE_TOKENS, 289 "$": TokenType.HEREDOC_STRING, 290 } 291 292 class Parser(parser.Parser): 293 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 294 # * select x from t1 union all select x from t2 limit 1; 295 # * select x from t1 union all (select x from t2 limit 1); 296 MODIFIERS_ATTACHED_TO_SET_OP = False 297 INTERVAL_SPANS = False 298 OPTIONAL_ALIAS_TOKEN_CTE = False 299 300 FUNCTIONS = { 301 **parser.Parser.FUNCTIONS, 302 "ANY": exp.AnyValue.from_arg_list, 303 "ARRAYSUM": exp.ArraySum.from_arg_list, 304 "COUNTIF": _build_count_if, 305 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 306 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 307 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 308 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 309 "DATE_FORMAT": _build_date_format, 310 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 311 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 312 "FORMATDATETIME": _build_date_format, 313 "JSONEXTRACTSTRING": build_json_extract_path( 314 exp.JSONExtractScalar, zero_based_indexing=False 315 ), 316 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 317 "MAP": parser.build_var_map, 318 "MATCH": exp.RegexpLike.from_arg_list, 319 "RANDCANONICAL": exp.Rand.from_arg_list, 320 "STR_TO_DATE": _build_str_to_date, 321 "TUPLE": exp.Struct.from_arg_list, 322 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 323 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 324 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 325 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 326 "UNIQ": exp.ApproxDistinct.from_arg_list, 327 "XOR": lambda args: exp.Xor(expressions=args), 328 "MD5": exp.MD5Digest.from_arg_list, 329 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 330 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 331 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 332 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 333 } 334 FUNCTIONS.pop("TRANSFORM") 335 336 AGG_FUNCTIONS = { 337 "count", 338 "min", 339 "max", 340 "sum", 341 "avg", 342 "any", 343 "stddevPop", 344 "stddevSamp", 345 "varPop", 346 "varSamp", 347 "corr", 348 "covarPop", 349 "covarSamp", 350 "entropy", 351 "exponentialMovingAverage", 352 "intervalLengthSum", 353 "kolmogorovSmirnovTest", 354 "mannWhitneyUTest", 355 "median", 356 "rankCorr", 357 "sumKahan", 358 "studentTTest", 359 "welchTTest", 360 "anyHeavy", 361 "anyLast", 362 "boundingRatio", 363 "first_value", 364 "last_value", 365 "argMin", 366 "argMax", 367 "avgWeighted", 368 "topK", 369 "topKWeighted", 370 "deltaSum", 371 "deltaSumTimestamp", 372 "groupArray", 373 "groupArrayLast", 374 "groupUniqArray", 375 "groupArrayInsertAt", 376 "groupArrayMovingAvg", 377 "groupArrayMovingSum", 378 "groupArraySample", 379 "groupBitAnd", 380 "groupBitOr", 381 "groupBitXor", 382 "groupBitmap", 383 "groupBitmapAnd", 384 "groupBitmapOr", 385 "groupBitmapXor", 386 "sumWithOverflow", 387 "sumMap", 388 "minMap", 389 "maxMap", 390 "skewSamp", 391 "skewPop", 392 "kurtSamp", 393 "kurtPop", 394 "uniq", 395 "uniqExact", 396 "uniqCombined", 397 "uniqCombined64", 398 "uniqHLL12", 399 "uniqTheta", 400 "quantile", 401 "quantiles", 402 "quantileExact", 403 "quantilesExact", 404 "quantileExactLow", 405 "quantilesExactLow", 406 "quantileExactHigh", 407 "quantilesExactHigh", 408 "quantileExactWeighted", 409 "quantilesExactWeighted", 410 "quantileTiming", 411 "quantilesTiming", 412 "quantileTimingWeighted", 413 "quantilesTimingWeighted", 414 "quantileDeterministic", 415 "quantilesDeterministic", 416 "quantileTDigest", 417 "quantilesTDigest", 418 "quantileTDigestWeighted", 419 "quantilesTDigestWeighted", 420 "quantileBFloat16", 421 "quantilesBFloat16", 422 "quantileBFloat16Weighted", 423 "quantilesBFloat16Weighted", 424 "simpleLinearRegression", 425 "stochasticLinearRegression", 426 "stochasticLogisticRegression", 427 "categoricalInformationValue", 428 "contingency", 429 "cramersV", 430 "cramersVBiasCorrected", 431 "theilsU", 432 "maxIntersections", 433 "maxIntersectionsPosition", 434 "meanZTest", 435 "quantileInterpolatedWeighted", 436 "quantilesInterpolatedWeighted", 437 "quantileGK", 438 "quantilesGK", 439 "sparkBar", 440 "sumCount", 441 "largestTriangleThreeBuckets", 442 "histogram", 443 "sequenceMatch", 444 "sequenceCount", 445 "windowFunnel", 446 "retention", 447 "uniqUpTo", 448 "sequenceNextNode", 449 "exponentialTimeDecayedAvg", 450 } 451 452 AGG_FUNCTIONS_SUFFIXES = [ 453 "If", 454 "Array", 455 "ArrayIf", 456 "Map", 457 "SimpleState", 458 "State", 459 "Merge", 460 "MergeState", 461 "ForEach", 462 "Distinct", 463 "OrDefault", 464 "OrNull", 465 "Resample", 466 "ArgMin", 467 "ArgMax", 468 ] 469 470 FUNC_TOKENS = { 471 *parser.Parser.FUNC_TOKENS, 472 TokenType.AND, 473 TokenType.OR, 474 TokenType.SET, 475 } 476 477 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 478 479 ID_VAR_TOKENS = { 480 *parser.Parser.ID_VAR_TOKENS, 481 TokenType.LIKE, 482 } 483 484 AGG_FUNC_MAPPING = ( 485 lambda functions, suffixes: { 486 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 487 } 488 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 489 490 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 491 492 FUNCTION_PARSERS = { 493 **parser.Parser.FUNCTION_PARSERS, 494 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 495 "QUANTILE": lambda self: self._parse_quantile(), 496 "MEDIAN": lambda self: self._parse_quantile(), 497 "COLUMNS": lambda self: self._parse_columns(), 498 } 499 500 FUNCTION_PARSERS.pop("MATCH") 501 502 PROPERTY_PARSERS = { 503 **parser.Parser.PROPERTY_PARSERS, 504 "ENGINE": lambda self: self._parse_engine_property(), 505 } 506 PROPERTY_PARSERS.pop("DYNAMIC") 507 508 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 509 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 510 511 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 512 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 513 514 RANGE_PARSERS = { 515 **parser.Parser.RANGE_PARSERS, 516 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 517 } 518 519 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 520 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 521 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 522 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 523 524 JOIN_KINDS = { 525 *parser.Parser.JOIN_KINDS, 526 TokenType.ANY, 527 TokenType.ASOF, 528 TokenType.ARRAY, 529 } 530 531 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 532 TokenType.ANY, 533 TokenType.ARRAY, 534 TokenType.FINAL, 535 TokenType.FORMAT, 536 TokenType.SETTINGS, 537 } 538 539 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 540 TokenType.FORMAT, 541 } 542 543 LOG_DEFAULTS_TO_LN = True 544 545 QUERY_MODIFIER_PARSERS = { 546 **parser.Parser.QUERY_MODIFIER_PARSERS, 547 TokenType.SETTINGS: lambda self: ( 548 "settings", 549 self._advance() or self._parse_csv(self._parse_assignment), 550 ), 551 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 552 } 553 554 CONSTRAINT_PARSERS = { 555 **parser.Parser.CONSTRAINT_PARSERS, 556 "INDEX": lambda self: self._parse_index_constraint(), 557 "CODEC": lambda self: self._parse_compress(), 558 } 559 560 ALTER_PARSERS = { 561 **parser.Parser.ALTER_PARSERS, 562 "REPLACE": lambda self: self._parse_alter_table_replace(), 563 } 564 565 SCHEMA_UNNAMED_CONSTRAINTS = { 566 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 567 "INDEX", 568 } 569 570 PLACEHOLDER_PARSERS = { 571 **parser.Parser.PLACEHOLDER_PARSERS, 572 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 573 } 574 575 def _parse_engine_property(self) -> exp.EngineProperty: 576 self._match(TokenType.EQ) 577 return self.expression( 578 exp.EngineProperty, 579 this=self._parse_field(any_token=True, anonymous_func=True), 580 ) 581 582 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 583 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 584 return self._parse_lambda() 585 586 def _parse_types( 587 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 588 ) -> t.Optional[exp.Expression]: 589 dtype = super()._parse_types( 590 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 591 ) 592 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 593 # Mark every type as non-nullable which is ClickHouse's default, unless it's 594 # already marked as nullable. This marker helps us transpile types from other 595 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 596 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 597 # fail in ClickHouse without the `Nullable` type constructor. 598 dtype.set("nullable", False) 599 600 return dtype 601 602 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 603 index = self._index 604 this = self._parse_bitwise() 605 if self._match(TokenType.FROM): 606 self._retreat(index) 607 return super()._parse_extract() 608 609 # We return Anonymous here because extract and regexpExtract have different semantics, 610 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 611 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 612 # 613 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 614 self._match(TokenType.COMMA) 615 return self.expression( 616 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 617 ) 618 619 def _parse_assignment(self) -> t.Optional[exp.Expression]: 620 this = super()._parse_assignment() 621 622 if self._match(TokenType.PLACEHOLDER): 623 return self.expression( 624 exp.If, 625 this=this, 626 true=self._parse_assignment(), 627 false=self._match(TokenType.COLON) and self._parse_assignment(), 628 ) 629 630 return this 631 632 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 633 """ 634 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 635 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 636 """ 637 index = self._index 638 639 this = self._parse_id_var() 640 self._match(TokenType.COLON) 641 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 642 self._match_text_seq("IDENTIFIER") and "Identifier" 643 ) 644 645 if not kind: 646 self._retreat(index) 647 return None 648 elif not self._match(TokenType.R_BRACE): 649 self.raise_error("Expecting }") 650 651 if isinstance(this, exp.Identifier) and not this.quoted: 652 this = exp.var(this.name) 653 654 return self.expression(exp.Placeholder, this=this, kind=kind) 655 656 def _parse_bracket( 657 self, this: t.Optional[exp.Expression] = None 658 ) -> t.Optional[exp.Expression]: 659 l_brace = self._match(TokenType.L_BRACE, advance=False) 660 bracket = super()._parse_bracket(this) 661 662 if l_brace and isinstance(bracket, exp.Struct): 663 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 664 for expression in bracket.expressions: 665 if not isinstance(expression, exp.PropertyEQ): 666 break 667 668 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 669 varmap.args["values"].append("expressions", expression.expression) 670 671 return varmap 672 673 return bracket 674 675 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 676 this = super()._parse_in(this) 677 this.set("is_global", is_global) 678 return this 679 680 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 681 is_negated = self._match(TokenType.NOT) 682 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 683 return self.expression(exp.Not, this=this) if is_negated else this 684 685 def _parse_table( 686 self, 687 schema: bool = False, 688 joins: bool = False, 689 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 690 parse_bracket: bool = False, 691 is_db_reference: bool = False, 692 parse_partition: bool = False, 693 ) -> t.Optional[exp.Expression]: 694 this = super()._parse_table( 695 schema=schema, 696 joins=joins, 697 alias_tokens=alias_tokens, 698 parse_bracket=parse_bracket, 699 is_db_reference=is_db_reference, 700 ) 701 702 if isinstance(this, exp.Table): 703 inner = this.this 704 alias = this.args.get("alias") 705 706 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 707 alias.set("columns", [exp.to_identifier("generate_series")]) 708 709 if self._match(TokenType.FINAL): 710 this = self.expression(exp.Final, this=this) 711 712 return this 713 714 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 715 return super()._parse_position(haystack_first=True) 716 717 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 718 def _parse_cte(self) -> t.Optional[exp.CTE]: 719 # WITH <identifier> AS <subquery expression> 720 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 721 722 if not cte: 723 # WITH <expression> AS <identifier> 724 cte = self.expression( 725 exp.CTE, 726 this=self._parse_assignment(), 727 alias=self._parse_table_alias(), 728 scalar=True, 729 ) 730 731 return cte 732 733 def _parse_join_parts( 734 self, 735 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 736 is_global = self._match(TokenType.GLOBAL) and self._prev 737 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 738 739 if kind_pre: 740 kind = self._match_set(self.JOIN_KINDS) and self._prev 741 side = self._match_set(self.JOIN_SIDES) and self._prev 742 return is_global, side, kind 743 744 return ( 745 is_global, 746 self._match_set(self.JOIN_SIDES) and self._prev, 747 self._match_set(self.JOIN_KINDS) and self._prev, 748 ) 749 750 def _parse_join( 751 self, skip_join_token: bool = False, parse_bracket: bool = False 752 ) -> t.Optional[exp.Join]: 753 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 754 if join: 755 join.set("global", join.args.pop("method", None)) 756 757 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 758 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 759 if join.kind == "ARRAY": 760 for table in join.find_all(exp.Table): 761 table.replace(table.to_column()) 762 763 return join 764 765 def _parse_function( 766 self, 767 functions: t.Optional[t.Dict[str, t.Callable]] = None, 768 anonymous: bool = False, 769 optional_parens: bool = True, 770 any_token: bool = False, 771 ) -> t.Optional[exp.Expression]: 772 expr = super()._parse_function( 773 functions=functions, 774 anonymous=anonymous, 775 optional_parens=optional_parens, 776 any_token=any_token, 777 ) 778 779 func = expr.this if isinstance(expr, exp.Window) else expr 780 781 # Aggregate functions can be split in 2 parts: <func_name><suffix> 782 parts = ( 783 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 784 ) 785 786 if parts: 787 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 788 params = self._parse_func_params(anon_func) 789 790 kwargs = { 791 "this": anon_func.this, 792 "expressions": anon_func.expressions, 793 } 794 if parts[1]: 795 exp_class: t.Type[exp.Expression] = ( 796 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 797 ) 798 else: 799 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 800 801 kwargs["exp_class"] = exp_class 802 if params: 803 kwargs["params"] = params 804 805 func = self.expression(**kwargs) 806 807 if isinstance(expr, exp.Window): 808 # The window's func was parsed as Anonymous in base parser, fix its 809 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 810 expr.set("this", func) 811 elif params: 812 # Params have blocked super()._parse_function() from parsing the following window 813 # (if that exists) as they're standing between the function call and the window spec 814 expr = self._parse_window(func) 815 else: 816 expr = func 817 818 return expr 819 820 def _parse_func_params( 821 self, this: t.Optional[exp.Func] = None 822 ) -> t.Optional[t.List[exp.Expression]]: 823 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 824 return self._parse_csv(self._parse_lambda) 825 826 if self._match(TokenType.L_PAREN): 827 params = self._parse_csv(self._parse_lambda) 828 self._match_r_paren(this) 829 return params 830 831 return None 832 833 def _parse_quantile(self) -> exp.Quantile: 834 this = self._parse_lambda() 835 params = self._parse_func_params() 836 if params: 837 return self.expression(exp.Quantile, this=params[0], quantile=this) 838 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 839 840 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 841 return super()._parse_wrapped_id_vars(optional=True) 842 843 def _parse_primary_key( 844 self, wrapped_optional: bool = False, in_props: bool = False 845 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 846 return super()._parse_primary_key( 847 wrapped_optional=wrapped_optional or in_props, in_props=in_props 848 ) 849 850 def _parse_on_property(self) -> t.Optional[exp.Expression]: 851 index = self._index 852 if self._match_text_seq("CLUSTER"): 853 this = self._parse_string() or self._parse_id_var() 854 if this: 855 return self.expression(exp.OnCluster, this=this) 856 else: 857 self._retreat(index) 858 return None 859 860 def _parse_index_constraint( 861 self, kind: t.Optional[str] = None 862 ) -> exp.IndexColumnConstraint: 863 # INDEX name1 expr TYPE type1(args) GRANULARITY value 864 this = self._parse_id_var() 865 expression = self._parse_assignment() 866 867 index_type = self._match_text_seq("TYPE") and ( 868 self._parse_function() or self._parse_var() 869 ) 870 871 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 872 873 return self.expression( 874 exp.IndexColumnConstraint, 875 this=this, 876 expression=expression, 877 index_type=index_type, 878 granularity=granularity, 879 ) 880 881 def _parse_partition(self) -> t.Optional[exp.Partition]: 882 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 883 if not self._match(TokenType.PARTITION): 884 return None 885 886 if self._match_text_seq("ID"): 887 # Corresponds to the PARTITION ID <string_value> syntax 888 expressions: t.List[exp.Expression] = [ 889 self.expression(exp.PartitionId, this=self._parse_string()) 890 ] 891 else: 892 expressions = self._parse_expressions() 893 894 return self.expression(exp.Partition, expressions=expressions) 895 896 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 897 partition = self._parse_partition() 898 899 if not partition or not self._match(TokenType.FROM): 900 return None 901 902 return self.expression( 903 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 904 ) 905 906 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 907 if not self._match_text_seq("PROJECTION"): 908 return None 909 910 return self.expression( 911 exp.ProjectionDef, 912 this=self._parse_id_var(), 913 expression=self._parse_wrapped(self._parse_statement), 914 ) 915 916 def _parse_constraint(self) -> t.Optional[exp.Expression]: 917 return super()._parse_constraint() or self._parse_projection_def() 918 919 def _parse_alias( 920 self, this: t.Optional[exp.Expression], explicit: bool = False 921 ) -> t.Optional[exp.Expression]: 922 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 923 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 924 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 925 return this 926 927 return super()._parse_alias(this=this, explicit=explicit) 928 929 def _parse_expression(self) -> t.Optional[exp.Expression]: 930 this = super()._parse_expression() 931 932 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 933 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 934 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 935 self._match(TokenType.R_PAREN) 936 937 return this 938 939 def _parse_columns(self) -> exp.Expression: 940 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 941 942 while self._next and self._match_text_seq(")", "APPLY", "("): 943 self._match(TokenType.R_PAREN) 944 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 945 return this 946 947 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 948 value = super()._parse_value(values=values) 949 if not value: 950 return None 951 952 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 953 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 954 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 955 # but the final result is not altered by the extra parentheses. 956 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 957 expressions = value.expressions 958 if values and not isinstance(expressions[-1], exp.Tuple): 959 value.set( 960 "expressions", 961 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 962 ) 963 964 return value 965 966 class Generator(generator.Generator): 967 QUERY_HINTS = False 968 STRUCT_DELIMITER = ("(", ")") 969 NVL2_SUPPORTED = False 970 TABLESAMPLE_REQUIRES_PARENS = False 971 TABLESAMPLE_SIZE_IS_ROWS = False 972 TABLESAMPLE_KEYWORDS = "SAMPLE" 973 LAST_DAY_SUPPORTS_DATE_PART = False 974 CAN_IMPLEMENT_ARRAY_ANY = True 975 SUPPORTS_TO_NUMBER = False 976 JOIN_HINTS = False 977 TABLE_HINTS = False 978 GROUPINGS_SEP = "" 979 SET_OP_MODIFIERS = False 980 ARRAY_SIZE_NAME = "LENGTH" 981 WRAP_DERIVED_VALUES = False 982 983 STRING_TYPE_MAPPING = { 984 exp.DataType.Type.BLOB: "String", 985 exp.DataType.Type.CHAR: "String", 986 exp.DataType.Type.LONGBLOB: "String", 987 exp.DataType.Type.LONGTEXT: "String", 988 exp.DataType.Type.MEDIUMBLOB: "String", 989 exp.DataType.Type.MEDIUMTEXT: "String", 990 exp.DataType.Type.TINYBLOB: "String", 991 exp.DataType.Type.TINYTEXT: "String", 992 exp.DataType.Type.TEXT: "String", 993 exp.DataType.Type.VARBINARY: "String", 994 exp.DataType.Type.VARCHAR: "String", 995 } 996 997 SUPPORTED_JSON_PATH_PARTS = { 998 exp.JSONPathKey, 999 exp.JSONPathRoot, 1000 exp.JSONPathSubscript, 1001 } 1002 1003 TYPE_MAPPING = { 1004 **generator.Generator.TYPE_MAPPING, 1005 **STRING_TYPE_MAPPING, 1006 exp.DataType.Type.ARRAY: "Array", 1007 exp.DataType.Type.BOOLEAN: "Bool", 1008 exp.DataType.Type.BIGINT: "Int64", 1009 exp.DataType.Type.DATE32: "Date32", 1010 exp.DataType.Type.DATETIME: "DateTime", 1011 exp.DataType.Type.DATETIME2: "DateTime", 1012 exp.DataType.Type.SMALLDATETIME: "DateTime", 1013 exp.DataType.Type.DATETIME64: "DateTime64", 1014 exp.DataType.Type.DECIMAL: "Decimal", 1015 exp.DataType.Type.DECIMAL32: "Decimal32", 1016 exp.DataType.Type.DECIMAL64: "Decimal64", 1017 exp.DataType.Type.DECIMAL128: "Decimal128", 1018 exp.DataType.Type.DECIMAL256: "Decimal256", 1019 exp.DataType.Type.TIMESTAMP: "DateTime", 1020 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1021 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1022 exp.DataType.Type.DOUBLE: "Float64", 1023 exp.DataType.Type.ENUM: "Enum", 1024 exp.DataType.Type.ENUM8: "Enum8", 1025 exp.DataType.Type.ENUM16: "Enum16", 1026 exp.DataType.Type.FIXEDSTRING: "FixedString", 1027 exp.DataType.Type.FLOAT: "Float32", 1028 exp.DataType.Type.INT: "Int32", 1029 exp.DataType.Type.MEDIUMINT: "Int32", 1030 exp.DataType.Type.INT128: "Int128", 1031 exp.DataType.Type.INT256: "Int256", 1032 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1033 exp.DataType.Type.MAP: "Map", 1034 exp.DataType.Type.NESTED: "Nested", 1035 exp.DataType.Type.NOTHING: "Nothing", 1036 exp.DataType.Type.SMALLINT: "Int16", 1037 exp.DataType.Type.STRUCT: "Tuple", 1038 exp.DataType.Type.TINYINT: "Int8", 1039 exp.DataType.Type.UBIGINT: "UInt64", 1040 exp.DataType.Type.UINT: "UInt32", 1041 exp.DataType.Type.UINT128: "UInt128", 1042 exp.DataType.Type.UINT256: "UInt256", 1043 exp.DataType.Type.USMALLINT: "UInt16", 1044 exp.DataType.Type.UTINYINT: "UInt8", 1045 exp.DataType.Type.IPV4: "IPv4", 1046 exp.DataType.Type.IPV6: "IPv6", 1047 exp.DataType.Type.POINT: "Point", 1048 exp.DataType.Type.RING: "Ring", 1049 exp.DataType.Type.LINESTRING: "LineString", 1050 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1051 exp.DataType.Type.POLYGON: "Polygon", 1052 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1053 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1054 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1055 exp.DataType.Type.DYNAMIC: "Dynamic", 1056 } 1057 1058 TRANSFORMS = { 1059 **generator.Generator.TRANSFORMS, 1060 exp.AnyValue: rename_func("any"), 1061 exp.ApproxDistinct: rename_func("uniq"), 1062 exp.ArrayConcat: rename_func("arrayConcat"), 1063 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1064 exp.ArraySum: rename_func("arraySum"), 1065 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1066 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1067 exp.Array: inline_array_sql, 1068 exp.CastToStrType: rename_func("CAST"), 1069 exp.CountIf: rename_func("countIf"), 1070 exp.CompressColumnConstraint: lambda self, 1071 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1072 exp.ComputedColumnConstraint: lambda self, 1073 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1074 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1075 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1076 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1077 exp.DateStrToDate: rename_func("toDate"), 1078 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1079 exp.Explode: rename_func("arrayJoin"), 1080 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1081 exp.IsNan: rename_func("isNaN"), 1082 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1083 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1084 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1085 exp.JSONPathKey: json_path_key_only_name, 1086 exp.JSONPathRoot: lambda *_: "", 1087 exp.Length: length_or_char_length_sql, 1088 exp.Map: _map_sql, 1089 exp.Median: rename_func("median"), 1090 exp.Nullif: rename_func("nullIf"), 1091 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1092 exp.Pivot: no_pivot_sql, 1093 exp.Quantile: _quantile_sql, 1094 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1095 exp.Rand: rename_func("randCanonical"), 1096 exp.StartsWith: rename_func("startsWith"), 1097 exp.StrPosition: lambda self, e: strposition_sql( 1098 self, 1099 e, 1100 func_name="POSITION", 1101 supports_position=True, 1102 use_ansi_position=False, 1103 ), 1104 exp.TimeToStr: lambda self, e: self.func( 1105 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1106 ), 1107 exp.TimeStrToTime: _timestrtotime_sql, 1108 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1109 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1110 exp.VarMap: _map_sql, 1111 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1112 exp.MD5Digest: rename_func("MD5"), 1113 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1114 exp.SHA: rename_func("SHA1"), 1115 exp.SHA2: sha256_sql, 1116 exp.UnixToTime: _unix_to_time_sql, 1117 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1118 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1119 exp.Variance: rename_func("varSamp"), 1120 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1121 exp.Stddev: rename_func("stddevSamp"), 1122 exp.Chr: rename_func("CHAR"), 1123 exp.Lag: lambda self, e: self.func( 1124 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1125 ), 1126 exp.Lead: lambda self, e: self.func( 1127 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1128 ), 1129 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1130 rename_func("editDistance") 1131 ), 1132 } 1133 1134 PROPERTIES_LOCATION = { 1135 **generator.Generator.PROPERTIES_LOCATION, 1136 exp.OnCluster: exp.Properties.Location.POST_NAME, 1137 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1138 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1139 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1140 } 1141 1142 # There's no list in docs, but it can be found in Clickhouse code 1143 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1144 ON_CLUSTER_TARGETS = { 1145 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1146 "DATABASE", 1147 "TABLE", 1148 "VIEW", 1149 "DICTIONARY", 1150 "INDEX", 1151 "FUNCTION", 1152 "NAMED COLLECTION", 1153 } 1154 1155 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1156 NON_NULLABLE_TYPES = { 1157 exp.DataType.Type.ARRAY, 1158 exp.DataType.Type.MAP, 1159 exp.DataType.Type.STRUCT, 1160 exp.DataType.Type.POINT, 1161 exp.DataType.Type.RING, 1162 exp.DataType.Type.LINESTRING, 1163 exp.DataType.Type.MULTILINESTRING, 1164 exp.DataType.Type.POLYGON, 1165 exp.DataType.Type.MULTIPOLYGON, 1166 } 1167 1168 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1169 strtodate_sql = self.function_fallback_sql(expression) 1170 1171 if not isinstance(expression.parent, exp.Cast): 1172 # StrToDate returns DATEs in other dialects (eg. postgres), so 1173 # this branch aims to improve the transpilation to clickhouse 1174 return self.cast_sql(exp.cast(expression, "DATE")) 1175 1176 return strtodate_sql 1177 1178 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1179 this = expression.this 1180 1181 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1182 return self.sql(this) 1183 1184 return super().cast_sql(expression, safe_prefix=safe_prefix) 1185 1186 def trycast_sql(self, expression: exp.TryCast) -> str: 1187 dtype = expression.to 1188 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1189 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1190 dtype.set("nullable", True) 1191 1192 return super().cast_sql(expression) 1193 1194 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1195 this = self.json_path_part(expression.this) 1196 return str(int(this) + 1) if is_int(this) else this 1197 1198 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1199 return f"AS {self.sql(expression, 'this')}" 1200 1201 def _any_to_has( 1202 self, 1203 expression: exp.EQ | exp.NEQ, 1204 default: t.Callable[[t.Any], str], 1205 prefix: str = "", 1206 ) -> str: 1207 if isinstance(expression.left, exp.Any): 1208 arr = expression.left 1209 this = expression.right 1210 elif isinstance(expression.right, exp.Any): 1211 arr = expression.right 1212 this = expression.left 1213 else: 1214 return default(expression) 1215 1216 return prefix + self.func("has", arr.this.unnest(), this) 1217 1218 def eq_sql(self, expression: exp.EQ) -> str: 1219 return self._any_to_has(expression, super().eq_sql) 1220 1221 def neq_sql(self, expression: exp.NEQ) -> str: 1222 return self._any_to_has(expression, super().neq_sql, "NOT ") 1223 1224 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1225 # Manually add a flag to make the search case-insensitive 1226 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1227 return self.func("match", expression.this, regex) 1228 1229 def datatype_sql(self, expression: exp.DataType) -> str: 1230 # String is the standard ClickHouse type, every other variant is just an alias. 1231 # Additionally, any supplied length parameter will be ignored. 1232 # 1233 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1234 if expression.this in self.STRING_TYPE_MAPPING: 1235 dtype = "String" 1236 else: 1237 dtype = super().datatype_sql(expression) 1238 1239 # This section changes the type to `Nullable(...)` if the following conditions hold: 1240 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1241 # and change their semantics 1242 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1243 # constraint: "Type of Map key must be a type, that can be represented by integer or 1244 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1245 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1246 parent = expression.parent 1247 nullable = expression.args.get("nullable") 1248 if nullable is True or ( 1249 nullable is None 1250 and not ( 1251 isinstance(parent, exp.DataType) 1252 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1253 and expression.index in (None, 0) 1254 ) 1255 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1256 ): 1257 dtype = f"Nullable({dtype})" 1258 1259 return dtype 1260 1261 def cte_sql(self, expression: exp.CTE) -> str: 1262 if expression.args.get("scalar"): 1263 this = self.sql(expression, "this") 1264 alias = self.sql(expression, "alias") 1265 return f"{this} AS {alias}" 1266 1267 return super().cte_sql(expression) 1268 1269 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1270 return super().after_limit_modifiers(expression) + [ 1271 ( 1272 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1273 if expression.args.get("settings") 1274 else "" 1275 ), 1276 ( 1277 self.seg("FORMAT ") + self.sql(expression, "format") 1278 if expression.args.get("format") 1279 else "" 1280 ), 1281 ] 1282 1283 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1284 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1285 1286 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1287 return f"ON CLUSTER {self.sql(expression, 'this')}" 1288 1289 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1290 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1291 exp.Properties.Location.POST_NAME 1292 ): 1293 this_name = self.sql( 1294 expression.this if isinstance(expression.this, exp.Schema) else expression, 1295 "this", 1296 ) 1297 this_properties = " ".join( 1298 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1299 ) 1300 this_schema = self.schema_columns_sql(expression.this) 1301 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1302 1303 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1304 1305 return super().createable_sql(expression, locations) 1306 1307 def create_sql(self, expression: exp.Create) -> str: 1308 # The comment property comes last in CTAS statements, i.e. after the query 1309 query = expression.expression 1310 if isinstance(query, exp.Query): 1311 comment_prop = expression.find(exp.SchemaCommentProperty) 1312 if comment_prop: 1313 comment_prop.pop() 1314 query.replace(exp.paren(query)) 1315 else: 1316 comment_prop = None 1317 1318 create_sql = super().create_sql(expression) 1319 1320 comment_sql = self.sql(comment_prop) 1321 comment_sql = f" {comment_sql}" if comment_sql else "" 1322 1323 return f"{create_sql}{comment_sql}" 1324 1325 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1326 this = self.indent(self.sql(expression, "this")) 1327 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1328 1329 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1330 this = self.sql(expression, "this") 1331 this = f" {this}" if this else "" 1332 expr = self.sql(expression, "expression") 1333 expr = f" {expr}" if expr else "" 1334 index_type = self.sql(expression, "index_type") 1335 index_type = f" TYPE {index_type}" if index_type else "" 1336 granularity = self.sql(expression, "granularity") 1337 granularity = f" GRANULARITY {granularity}" if granularity else "" 1338 1339 return f"INDEX{this}{expr}{index_type}{granularity}" 1340 1341 def partition_sql(self, expression: exp.Partition) -> str: 1342 return f"PARTITION {self.expressions(expression, flat=True)}" 1343 1344 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1345 return f"ID {self.sql(expression.this)}" 1346 1347 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1348 return ( 1349 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1350 ) 1351 1352 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1353 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1354 1355 def is_sql(self, expression: exp.Is) -> str: 1356 is_sql = super().is_sql(expression) 1357 1358 if isinstance(expression.parent, exp.Not): 1359 # value IS NOT NULL -> NOT (value IS NULL) 1360 is_sql = self.wrap(is_sql) 1361 1362 return is_sql 1363 1364 def in_sql(self, expression: exp.In) -> str: 1365 in_sql = super().in_sql(expression) 1366 1367 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1368 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1369 1370 return in_sql 1371 1372 def not_sql(self, expression: exp.Not) -> str: 1373 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1374 # let `GLOBAL IN` child interpose `NOT` 1375 return self.sql(expression, "this") 1376 1377 return super().not_sql(expression) 1378 1379 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1380 # If the VALUES clause contains tuples of expressions, we need to treat it 1381 # as a table since Clickhouse will automatically alias it as such. 1382 alias = expression.args.get("alias") 1383 1384 if alias and alias.args.get("columns") and expression.expressions: 1385 values = expression.expressions[0].expressions 1386 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1387 else: 1388 values_as_table = True 1389 1390 return super().values_sql(expression, values_as_table=values_as_table)
185class ClickHouse(Dialect): 186 NORMALIZE_FUNCTIONS: bool | str = False 187 NULL_ORDERING = "nulls_are_last" 188 SUPPORTS_USER_DEFINED_TYPES = False 189 SAFE_DIVISION = True 190 LOG_BASE_FIRST: t.Optional[bool] = None 191 FORCE_EARLY_ALIAS_REF_EXPANSION = True 192 PRESERVE_ORIGINAL_NAMES = True 193 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 194 IDENTIFIERS_CAN_START_WITH_DIGIT = True 195 HEX_STRING_IS_INTEGER_TYPE = True 196 197 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 198 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 199 200 UNESCAPED_SEQUENCES = { 201 "\\0": "\0", 202 } 203 204 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 205 206 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 207 exp.Except: False, 208 exp.Intersect: False, 209 exp.Union: None, 210 } 211 212 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 213 # Clickhouse allows VALUES to have an embedded structure e.g: 214 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 215 # In this case, we don't want to qualify the columns 216 values = expression.expressions[0].expressions 217 218 structure = ( 219 values[0] 220 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 221 else None 222 ) 223 if structure: 224 # Split each column definition into the column name e.g: 225 # 'person String, place String' -> ['person', 'place'] 226 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 227 column_aliases = [ 228 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 229 ] 230 else: 231 # Default column aliases in CH are "c1", "c2", etc. 232 column_aliases = [ 233 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 234 ] 235 236 return column_aliases 237 238 class Tokenizer(tokens.Tokenizer): 239 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 240 IDENTIFIERS = ['"', "`"] 241 IDENTIFIER_ESCAPES = ["\\"] 242 STRING_ESCAPES = ["'", "\\"] 243 BIT_STRINGS = [("0b", "")] 244 HEX_STRINGS = [("0x", ""), ("0X", "")] 245 HEREDOC_STRINGS = ["$"] 246 247 KEYWORDS = { 248 **tokens.Tokenizer.KEYWORDS, 249 ".:": TokenType.DOTCOLON, 250 "ATTACH": TokenType.COMMAND, 251 "DATE32": TokenType.DATE32, 252 "DATETIME64": TokenType.DATETIME64, 253 "DICTIONARY": TokenType.DICTIONARY, 254 "DYNAMIC": TokenType.DYNAMIC, 255 "ENUM8": TokenType.ENUM8, 256 "ENUM16": TokenType.ENUM16, 257 "EXCHANGE": TokenType.COMMAND, 258 "FINAL": TokenType.FINAL, 259 "FIXEDSTRING": TokenType.FIXEDSTRING, 260 "FLOAT32": TokenType.FLOAT, 261 "FLOAT64": TokenType.DOUBLE, 262 "GLOBAL": TokenType.GLOBAL, 263 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 264 "MAP": TokenType.MAP, 265 "NESTED": TokenType.NESTED, 266 "NOTHING": TokenType.NOTHING, 267 "SAMPLE": TokenType.TABLE_SAMPLE, 268 "TUPLE": TokenType.STRUCT, 269 "UINT16": TokenType.USMALLINT, 270 "UINT32": TokenType.UINT, 271 "UINT64": TokenType.UBIGINT, 272 "UINT8": TokenType.UTINYINT, 273 "IPV4": TokenType.IPV4, 274 "IPV6": TokenType.IPV6, 275 "POINT": TokenType.POINT, 276 "RING": TokenType.RING, 277 "LINESTRING": TokenType.LINESTRING, 278 "MULTILINESTRING": TokenType.MULTILINESTRING, 279 "POLYGON": TokenType.POLYGON, 280 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 281 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 282 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 283 "SYSTEM": TokenType.COMMAND, 284 "PREWHERE": TokenType.PREWHERE, 285 } 286 KEYWORDS.pop("/*+") 287 288 SINGLE_TOKENS = { 289 **tokens.Tokenizer.SINGLE_TOKENS, 290 "$": TokenType.HEREDOC_STRING, 291 } 292 293 class Parser(parser.Parser): 294 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 295 # * select x from t1 union all select x from t2 limit 1; 296 # * select x from t1 union all (select x from t2 limit 1); 297 MODIFIERS_ATTACHED_TO_SET_OP = False 298 INTERVAL_SPANS = False 299 OPTIONAL_ALIAS_TOKEN_CTE = False 300 301 FUNCTIONS = { 302 **parser.Parser.FUNCTIONS, 303 "ANY": exp.AnyValue.from_arg_list, 304 "ARRAYSUM": exp.ArraySum.from_arg_list, 305 "COUNTIF": _build_count_if, 306 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 307 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 308 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 309 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 310 "DATE_FORMAT": _build_date_format, 311 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 312 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 313 "FORMATDATETIME": _build_date_format, 314 "JSONEXTRACTSTRING": build_json_extract_path( 315 exp.JSONExtractScalar, zero_based_indexing=False 316 ), 317 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 318 "MAP": parser.build_var_map, 319 "MATCH": exp.RegexpLike.from_arg_list, 320 "RANDCANONICAL": exp.Rand.from_arg_list, 321 "STR_TO_DATE": _build_str_to_date, 322 "TUPLE": exp.Struct.from_arg_list, 323 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 324 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 325 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 326 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 327 "UNIQ": exp.ApproxDistinct.from_arg_list, 328 "XOR": lambda args: exp.Xor(expressions=args), 329 "MD5": exp.MD5Digest.from_arg_list, 330 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 331 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 332 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 333 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 334 } 335 FUNCTIONS.pop("TRANSFORM") 336 337 AGG_FUNCTIONS = { 338 "count", 339 "min", 340 "max", 341 "sum", 342 "avg", 343 "any", 344 "stddevPop", 345 "stddevSamp", 346 "varPop", 347 "varSamp", 348 "corr", 349 "covarPop", 350 "covarSamp", 351 "entropy", 352 "exponentialMovingAverage", 353 "intervalLengthSum", 354 "kolmogorovSmirnovTest", 355 "mannWhitneyUTest", 356 "median", 357 "rankCorr", 358 "sumKahan", 359 "studentTTest", 360 "welchTTest", 361 "anyHeavy", 362 "anyLast", 363 "boundingRatio", 364 "first_value", 365 "last_value", 366 "argMin", 367 "argMax", 368 "avgWeighted", 369 "topK", 370 "topKWeighted", 371 "deltaSum", 372 "deltaSumTimestamp", 373 "groupArray", 374 "groupArrayLast", 375 "groupUniqArray", 376 "groupArrayInsertAt", 377 "groupArrayMovingAvg", 378 "groupArrayMovingSum", 379 "groupArraySample", 380 "groupBitAnd", 381 "groupBitOr", 382 "groupBitXor", 383 "groupBitmap", 384 "groupBitmapAnd", 385 "groupBitmapOr", 386 "groupBitmapXor", 387 "sumWithOverflow", 388 "sumMap", 389 "minMap", 390 "maxMap", 391 "skewSamp", 392 "skewPop", 393 "kurtSamp", 394 "kurtPop", 395 "uniq", 396 "uniqExact", 397 "uniqCombined", 398 "uniqCombined64", 399 "uniqHLL12", 400 "uniqTheta", 401 "quantile", 402 "quantiles", 403 "quantileExact", 404 "quantilesExact", 405 "quantileExactLow", 406 "quantilesExactLow", 407 "quantileExactHigh", 408 "quantilesExactHigh", 409 "quantileExactWeighted", 410 "quantilesExactWeighted", 411 "quantileTiming", 412 "quantilesTiming", 413 "quantileTimingWeighted", 414 "quantilesTimingWeighted", 415 "quantileDeterministic", 416 "quantilesDeterministic", 417 "quantileTDigest", 418 "quantilesTDigest", 419 "quantileTDigestWeighted", 420 "quantilesTDigestWeighted", 421 "quantileBFloat16", 422 "quantilesBFloat16", 423 "quantileBFloat16Weighted", 424 "quantilesBFloat16Weighted", 425 "simpleLinearRegression", 426 "stochasticLinearRegression", 427 "stochasticLogisticRegression", 428 "categoricalInformationValue", 429 "contingency", 430 "cramersV", 431 "cramersVBiasCorrected", 432 "theilsU", 433 "maxIntersections", 434 "maxIntersectionsPosition", 435 "meanZTest", 436 "quantileInterpolatedWeighted", 437 "quantilesInterpolatedWeighted", 438 "quantileGK", 439 "quantilesGK", 440 "sparkBar", 441 "sumCount", 442 "largestTriangleThreeBuckets", 443 "histogram", 444 "sequenceMatch", 445 "sequenceCount", 446 "windowFunnel", 447 "retention", 448 "uniqUpTo", 449 "sequenceNextNode", 450 "exponentialTimeDecayedAvg", 451 } 452 453 AGG_FUNCTIONS_SUFFIXES = [ 454 "If", 455 "Array", 456 "ArrayIf", 457 "Map", 458 "SimpleState", 459 "State", 460 "Merge", 461 "MergeState", 462 "ForEach", 463 "Distinct", 464 "OrDefault", 465 "OrNull", 466 "Resample", 467 "ArgMin", 468 "ArgMax", 469 ] 470 471 FUNC_TOKENS = { 472 *parser.Parser.FUNC_TOKENS, 473 TokenType.AND, 474 TokenType.OR, 475 TokenType.SET, 476 } 477 478 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 479 480 ID_VAR_TOKENS = { 481 *parser.Parser.ID_VAR_TOKENS, 482 TokenType.LIKE, 483 } 484 485 AGG_FUNC_MAPPING = ( 486 lambda functions, suffixes: { 487 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 488 } 489 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 490 491 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 492 493 FUNCTION_PARSERS = { 494 **parser.Parser.FUNCTION_PARSERS, 495 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 496 "QUANTILE": lambda self: self._parse_quantile(), 497 "MEDIAN": lambda self: self._parse_quantile(), 498 "COLUMNS": lambda self: self._parse_columns(), 499 } 500 501 FUNCTION_PARSERS.pop("MATCH") 502 503 PROPERTY_PARSERS = { 504 **parser.Parser.PROPERTY_PARSERS, 505 "ENGINE": lambda self: self._parse_engine_property(), 506 } 507 PROPERTY_PARSERS.pop("DYNAMIC") 508 509 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 510 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 511 512 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 513 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 514 515 RANGE_PARSERS = { 516 **parser.Parser.RANGE_PARSERS, 517 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 518 } 519 520 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 521 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 522 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 523 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 524 525 JOIN_KINDS = { 526 *parser.Parser.JOIN_KINDS, 527 TokenType.ANY, 528 TokenType.ASOF, 529 TokenType.ARRAY, 530 } 531 532 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 533 TokenType.ANY, 534 TokenType.ARRAY, 535 TokenType.FINAL, 536 TokenType.FORMAT, 537 TokenType.SETTINGS, 538 } 539 540 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 541 TokenType.FORMAT, 542 } 543 544 LOG_DEFAULTS_TO_LN = True 545 546 QUERY_MODIFIER_PARSERS = { 547 **parser.Parser.QUERY_MODIFIER_PARSERS, 548 TokenType.SETTINGS: lambda self: ( 549 "settings", 550 self._advance() or self._parse_csv(self._parse_assignment), 551 ), 552 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 553 } 554 555 CONSTRAINT_PARSERS = { 556 **parser.Parser.CONSTRAINT_PARSERS, 557 "INDEX": lambda self: self._parse_index_constraint(), 558 "CODEC": lambda self: self._parse_compress(), 559 } 560 561 ALTER_PARSERS = { 562 **parser.Parser.ALTER_PARSERS, 563 "REPLACE": lambda self: self._parse_alter_table_replace(), 564 } 565 566 SCHEMA_UNNAMED_CONSTRAINTS = { 567 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 568 "INDEX", 569 } 570 571 PLACEHOLDER_PARSERS = { 572 **parser.Parser.PLACEHOLDER_PARSERS, 573 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 574 } 575 576 def _parse_engine_property(self) -> exp.EngineProperty: 577 self._match(TokenType.EQ) 578 return self.expression( 579 exp.EngineProperty, 580 this=self._parse_field(any_token=True, anonymous_func=True), 581 ) 582 583 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 584 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 585 return self._parse_lambda() 586 587 def _parse_types( 588 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 589 ) -> t.Optional[exp.Expression]: 590 dtype = super()._parse_types( 591 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 592 ) 593 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 594 # Mark every type as non-nullable which is ClickHouse's default, unless it's 595 # already marked as nullable. This marker helps us transpile types from other 596 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 597 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 598 # fail in ClickHouse without the `Nullable` type constructor. 599 dtype.set("nullable", False) 600 601 return dtype 602 603 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 604 index = self._index 605 this = self._parse_bitwise() 606 if self._match(TokenType.FROM): 607 self._retreat(index) 608 return super()._parse_extract() 609 610 # We return Anonymous here because extract and regexpExtract have different semantics, 611 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 612 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 613 # 614 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 615 self._match(TokenType.COMMA) 616 return self.expression( 617 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 618 ) 619 620 def _parse_assignment(self) -> t.Optional[exp.Expression]: 621 this = super()._parse_assignment() 622 623 if self._match(TokenType.PLACEHOLDER): 624 return self.expression( 625 exp.If, 626 this=this, 627 true=self._parse_assignment(), 628 false=self._match(TokenType.COLON) and self._parse_assignment(), 629 ) 630 631 return this 632 633 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 634 """ 635 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 636 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 637 """ 638 index = self._index 639 640 this = self._parse_id_var() 641 self._match(TokenType.COLON) 642 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 643 self._match_text_seq("IDENTIFIER") and "Identifier" 644 ) 645 646 if not kind: 647 self._retreat(index) 648 return None 649 elif not self._match(TokenType.R_BRACE): 650 self.raise_error("Expecting }") 651 652 if isinstance(this, exp.Identifier) and not this.quoted: 653 this = exp.var(this.name) 654 655 return self.expression(exp.Placeholder, this=this, kind=kind) 656 657 def _parse_bracket( 658 self, this: t.Optional[exp.Expression] = None 659 ) -> t.Optional[exp.Expression]: 660 l_brace = self._match(TokenType.L_BRACE, advance=False) 661 bracket = super()._parse_bracket(this) 662 663 if l_brace and isinstance(bracket, exp.Struct): 664 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 665 for expression in bracket.expressions: 666 if not isinstance(expression, exp.PropertyEQ): 667 break 668 669 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 670 varmap.args["values"].append("expressions", expression.expression) 671 672 return varmap 673 674 return bracket 675 676 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 677 this = super()._parse_in(this) 678 this.set("is_global", is_global) 679 return this 680 681 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 682 is_negated = self._match(TokenType.NOT) 683 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 684 return self.expression(exp.Not, this=this) if is_negated else this 685 686 def _parse_table( 687 self, 688 schema: bool = False, 689 joins: bool = False, 690 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 691 parse_bracket: bool = False, 692 is_db_reference: bool = False, 693 parse_partition: bool = False, 694 ) -> t.Optional[exp.Expression]: 695 this = super()._parse_table( 696 schema=schema, 697 joins=joins, 698 alias_tokens=alias_tokens, 699 parse_bracket=parse_bracket, 700 is_db_reference=is_db_reference, 701 ) 702 703 if isinstance(this, exp.Table): 704 inner = this.this 705 alias = this.args.get("alias") 706 707 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 708 alias.set("columns", [exp.to_identifier("generate_series")]) 709 710 if self._match(TokenType.FINAL): 711 this = self.expression(exp.Final, this=this) 712 713 return this 714 715 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 716 return super()._parse_position(haystack_first=True) 717 718 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 719 def _parse_cte(self) -> t.Optional[exp.CTE]: 720 # WITH <identifier> AS <subquery expression> 721 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 722 723 if not cte: 724 # WITH <expression> AS <identifier> 725 cte = self.expression( 726 exp.CTE, 727 this=self._parse_assignment(), 728 alias=self._parse_table_alias(), 729 scalar=True, 730 ) 731 732 return cte 733 734 def _parse_join_parts( 735 self, 736 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 737 is_global = self._match(TokenType.GLOBAL) and self._prev 738 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 739 740 if kind_pre: 741 kind = self._match_set(self.JOIN_KINDS) and self._prev 742 side = self._match_set(self.JOIN_SIDES) and self._prev 743 return is_global, side, kind 744 745 return ( 746 is_global, 747 self._match_set(self.JOIN_SIDES) and self._prev, 748 self._match_set(self.JOIN_KINDS) and self._prev, 749 ) 750 751 def _parse_join( 752 self, skip_join_token: bool = False, parse_bracket: bool = False 753 ) -> t.Optional[exp.Join]: 754 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 755 if join: 756 join.set("global", join.args.pop("method", None)) 757 758 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 759 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 760 if join.kind == "ARRAY": 761 for table in join.find_all(exp.Table): 762 table.replace(table.to_column()) 763 764 return join 765 766 def _parse_function( 767 self, 768 functions: t.Optional[t.Dict[str, t.Callable]] = None, 769 anonymous: bool = False, 770 optional_parens: bool = True, 771 any_token: bool = False, 772 ) -> t.Optional[exp.Expression]: 773 expr = super()._parse_function( 774 functions=functions, 775 anonymous=anonymous, 776 optional_parens=optional_parens, 777 any_token=any_token, 778 ) 779 780 func = expr.this if isinstance(expr, exp.Window) else expr 781 782 # Aggregate functions can be split in 2 parts: <func_name><suffix> 783 parts = ( 784 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 785 ) 786 787 if parts: 788 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 789 params = self._parse_func_params(anon_func) 790 791 kwargs = { 792 "this": anon_func.this, 793 "expressions": anon_func.expressions, 794 } 795 if parts[1]: 796 exp_class: t.Type[exp.Expression] = ( 797 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 798 ) 799 else: 800 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 801 802 kwargs["exp_class"] = exp_class 803 if params: 804 kwargs["params"] = params 805 806 func = self.expression(**kwargs) 807 808 if isinstance(expr, exp.Window): 809 # The window's func was parsed as Anonymous in base parser, fix its 810 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 811 expr.set("this", func) 812 elif params: 813 # Params have blocked super()._parse_function() from parsing the following window 814 # (if that exists) as they're standing between the function call and the window spec 815 expr = self._parse_window(func) 816 else: 817 expr = func 818 819 return expr 820 821 def _parse_func_params( 822 self, this: t.Optional[exp.Func] = None 823 ) -> t.Optional[t.List[exp.Expression]]: 824 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 825 return self._parse_csv(self._parse_lambda) 826 827 if self._match(TokenType.L_PAREN): 828 params = self._parse_csv(self._parse_lambda) 829 self._match_r_paren(this) 830 return params 831 832 return None 833 834 def _parse_quantile(self) -> exp.Quantile: 835 this = self._parse_lambda() 836 params = self._parse_func_params() 837 if params: 838 return self.expression(exp.Quantile, this=params[0], quantile=this) 839 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 840 841 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 842 return super()._parse_wrapped_id_vars(optional=True) 843 844 def _parse_primary_key( 845 self, wrapped_optional: bool = False, in_props: bool = False 846 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 847 return super()._parse_primary_key( 848 wrapped_optional=wrapped_optional or in_props, in_props=in_props 849 ) 850 851 def _parse_on_property(self) -> t.Optional[exp.Expression]: 852 index = self._index 853 if self._match_text_seq("CLUSTER"): 854 this = self._parse_string() or self._parse_id_var() 855 if this: 856 return self.expression(exp.OnCluster, this=this) 857 else: 858 self._retreat(index) 859 return None 860 861 def _parse_index_constraint( 862 self, kind: t.Optional[str] = None 863 ) -> exp.IndexColumnConstraint: 864 # INDEX name1 expr TYPE type1(args) GRANULARITY value 865 this = self._parse_id_var() 866 expression = self._parse_assignment() 867 868 index_type = self._match_text_seq("TYPE") and ( 869 self._parse_function() or self._parse_var() 870 ) 871 872 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 873 874 return self.expression( 875 exp.IndexColumnConstraint, 876 this=this, 877 expression=expression, 878 index_type=index_type, 879 granularity=granularity, 880 ) 881 882 def _parse_partition(self) -> t.Optional[exp.Partition]: 883 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 884 if not self._match(TokenType.PARTITION): 885 return None 886 887 if self._match_text_seq("ID"): 888 # Corresponds to the PARTITION ID <string_value> syntax 889 expressions: t.List[exp.Expression] = [ 890 self.expression(exp.PartitionId, this=self._parse_string()) 891 ] 892 else: 893 expressions = self._parse_expressions() 894 895 return self.expression(exp.Partition, expressions=expressions) 896 897 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 898 partition = self._parse_partition() 899 900 if not partition or not self._match(TokenType.FROM): 901 return None 902 903 return self.expression( 904 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 905 ) 906 907 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 908 if not self._match_text_seq("PROJECTION"): 909 return None 910 911 return self.expression( 912 exp.ProjectionDef, 913 this=self._parse_id_var(), 914 expression=self._parse_wrapped(self._parse_statement), 915 ) 916 917 def _parse_constraint(self) -> t.Optional[exp.Expression]: 918 return super()._parse_constraint() or self._parse_projection_def() 919 920 def _parse_alias( 921 self, this: t.Optional[exp.Expression], explicit: bool = False 922 ) -> t.Optional[exp.Expression]: 923 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 924 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 925 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 926 return this 927 928 return super()._parse_alias(this=this, explicit=explicit) 929 930 def _parse_expression(self) -> t.Optional[exp.Expression]: 931 this = super()._parse_expression() 932 933 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 934 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 935 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 936 self._match(TokenType.R_PAREN) 937 938 return this 939 940 def _parse_columns(self) -> exp.Expression: 941 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 942 943 while self._next and self._match_text_seq(")", "APPLY", "("): 944 self._match(TokenType.R_PAREN) 945 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 946 return this 947 948 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 949 value = super()._parse_value(values=values) 950 if not value: 951 return None 952 953 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 954 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 955 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 956 # but the final result is not altered by the extra parentheses. 957 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 958 expressions = value.expressions 959 if values and not isinstance(expressions[-1], exp.Tuple): 960 value.set( 961 "expressions", 962 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 963 ) 964 965 return value 966 967 class Generator(generator.Generator): 968 QUERY_HINTS = False 969 STRUCT_DELIMITER = ("(", ")") 970 NVL2_SUPPORTED = False 971 TABLESAMPLE_REQUIRES_PARENS = False 972 TABLESAMPLE_SIZE_IS_ROWS = False 973 TABLESAMPLE_KEYWORDS = "SAMPLE" 974 LAST_DAY_SUPPORTS_DATE_PART = False 975 CAN_IMPLEMENT_ARRAY_ANY = True 976 SUPPORTS_TO_NUMBER = False 977 JOIN_HINTS = False 978 TABLE_HINTS = False 979 GROUPINGS_SEP = "" 980 SET_OP_MODIFIERS = False 981 ARRAY_SIZE_NAME = "LENGTH" 982 WRAP_DERIVED_VALUES = False 983 984 STRING_TYPE_MAPPING = { 985 exp.DataType.Type.BLOB: "String", 986 exp.DataType.Type.CHAR: "String", 987 exp.DataType.Type.LONGBLOB: "String", 988 exp.DataType.Type.LONGTEXT: "String", 989 exp.DataType.Type.MEDIUMBLOB: "String", 990 exp.DataType.Type.MEDIUMTEXT: "String", 991 exp.DataType.Type.TINYBLOB: "String", 992 exp.DataType.Type.TINYTEXT: "String", 993 exp.DataType.Type.TEXT: "String", 994 exp.DataType.Type.VARBINARY: "String", 995 exp.DataType.Type.VARCHAR: "String", 996 } 997 998 SUPPORTED_JSON_PATH_PARTS = { 999 exp.JSONPathKey, 1000 exp.JSONPathRoot, 1001 exp.JSONPathSubscript, 1002 } 1003 1004 TYPE_MAPPING = { 1005 **generator.Generator.TYPE_MAPPING, 1006 **STRING_TYPE_MAPPING, 1007 exp.DataType.Type.ARRAY: "Array", 1008 exp.DataType.Type.BOOLEAN: "Bool", 1009 exp.DataType.Type.BIGINT: "Int64", 1010 exp.DataType.Type.DATE32: "Date32", 1011 exp.DataType.Type.DATETIME: "DateTime", 1012 exp.DataType.Type.DATETIME2: "DateTime", 1013 exp.DataType.Type.SMALLDATETIME: "DateTime", 1014 exp.DataType.Type.DATETIME64: "DateTime64", 1015 exp.DataType.Type.DECIMAL: "Decimal", 1016 exp.DataType.Type.DECIMAL32: "Decimal32", 1017 exp.DataType.Type.DECIMAL64: "Decimal64", 1018 exp.DataType.Type.DECIMAL128: "Decimal128", 1019 exp.DataType.Type.DECIMAL256: "Decimal256", 1020 exp.DataType.Type.TIMESTAMP: "DateTime", 1021 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1022 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1023 exp.DataType.Type.DOUBLE: "Float64", 1024 exp.DataType.Type.ENUM: "Enum", 1025 exp.DataType.Type.ENUM8: "Enum8", 1026 exp.DataType.Type.ENUM16: "Enum16", 1027 exp.DataType.Type.FIXEDSTRING: "FixedString", 1028 exp.DataType.Type.FLOAT: "Float32", 1029 exp.DataType.Type.INT: "Int32", 1030 exp.DataType.Type.MEDIUMINT: "Int32", 1031 exp.DataType.Type.INT128: "Int128", 1032 exp.DataType.Type.INT256: "Int256", 1033 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1034 exp.DataType.Type.MAP: "Map", 1035 exp.DataType.Type.NESTED: "Nested", 1036 exp.DataType.Type.NOTHING: "Nothing", 1037 exp.DataType.Type.SMALLINT: "Int16", 1038 exp.DataType.Type.STRUCT: "Tuple", 1039 exp.DataType.Type.TINYINT: "Int8", 1040 exp.DataType.Type.UBIGINT: "UInt64", 1041 exp.DataType.Type.UINT: "UInt32", 1042 exp.DataType.Type.UINT128: "UInt128", 1043 exp.DataType.Type.UINT256: "UInt256", 1044 exp.DataType.Type.USMALLINT: "UInt16", 1045 exp.DataType.Type.UTINYINT: "UInt8", 1046 exp.DataType.Type.IPV4: "IPv4", 1047 exp.DataType.Type.IPV6: "IPv6", 1048 exp.DataType.Type.POINT: "Point", 1049 exp.DataType.Type.RING: "Ring", 1050 exp.DataType.Type.LINESTRING: "LineString", 1051 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1052 exp.DataType.Type.POLYGON: "Polygon", 1053 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1054 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1055 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1056 exp.DataType.Type.DYNAMIC: "Dynamic", 1057 } 1058 1059 TRANSFORMS = { 1060 **generator.Generator.TRANSFORMS, 1061 exp.AnyValue: rename_func("any"), 1062 exp.ApproxDistinct: rename_func("uniq"), 1063 exp.ArrayConcat: rename_func("arrayConcat"), 1064 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1065 exp.ArraySum: rename_func("arraySum"), 1066 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1067 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1068 exp.Array: inline_array_sql, 1069 exp.CastToStrType: rename_func("CAST"), 1070 exp.CountIf: rename_func("countIf"), 1071 exp.CompressColumnConstraint: lambda self, 1072 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1073 exp.ComputedColumnConstraint: lambda self, 1074 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1075 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1076 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1077 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1078 exp.DateStrToDate: rename_func("toDate"), 1079 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1080 exp.Explode: rename_func("arrayJoin"), 1081 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1082 exp.IsNan: rename_func("isNaN"), 1083 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1084 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1085 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1086 exp.JSONPathKey: json_path_key_only_name, 1087 exp.JSONPathRoot: lambda *_: "", 1088 exp.Length: length_or_char_length_sql, 1089 exp.Map: _map_sql, 1090 exp.Median: rename_func("median"), 1091 exp.Nullif: rename_func("nullIf"), 1092 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1093 exp.Pivot: no_pivot_sql, 1094 exp.Quantile: _quantile_sql, 1095 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1096 exp.Rand: rename_func("randCanonical"), 1097 exp.StartsWith: rename_func("startsWith"), 1098 exp.StrPosition: lambda self, e: strposition_sql( 1099 self, 1100 e, 1101 func_name="POSITION", 1102 supports_position=True, 1103 use_ansi_position=False, 1104 ), 1105 exp.TimeToStr: lambda self, e: self.func( 1106 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1107 ), 1108 exp.TimeStrToTime: _timestrtotime_sql, 1109 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1110 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1111 exp.VarMap: _map_sql, 1112 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1113 exp.MD5Digest: rename_func("MD5"), 1114 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1115 exp.SHA: rename_func("SHA1"), 1116 exp.SHA2: sha256_sql, 1117 exp.UnixToTime: _unix_to_time_sql, 1118 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1119 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1120 exp.Variance: rename_func("varSamp"), 1121 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1122 exp.Stddev: rename_func("stddevSamp"), 1123 exp.Chr: rename_func("CHAR"), 1124 exp.Lag: lambda self, e: self.func( 1125 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1126 ), 1127 exp.Lead: lambda self, e: self.func( 1128 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1129 ), 1130 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1131 rename_func("editDistance") 1132 ), 1133 } 1134 1135 PROPERTIES_LOCATION = { 1136 **generator.Generator.PROPERTIES_LOCATION, 1137 exp.OnCluster: exp.Properties.Location.POST_NAME, 1138 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1139 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1140 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1141 } 1142 1143 # There's no list in docs, but it can be found in Clickhouse code 1144 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1145 ON_CLUSTER_TARGETS = { 1146 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1147 "DATABASE", 1148 "TABLE", 1149 "VIEW", 1150 "DICTIONARY", 1151 "INDEX", 1152 "FUNCTION", 1153 "NAMED COLLECTION", 1154 } 1155 1156 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1157 NON_NULLABLE_TYPES = { 1158 exp.DataType.Type.ARRAY, 1159 exp.DataType.Type.MAP, 1160 exp.DataType.Type.STRUCT, 1161 exp.DataType.Type.POINT, 1162 exp.DataType.Type.RING, 1163 exp.DataType.Type.LINESTRING, 1164 exp.DataType.Type.MULTILINESTRING, 1165 exp.DataType.Type.POLYGON, 1166 exp.DataType.Type.MULTIPOLYGON, 1167 } 1168 1169 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1170 strtodate_sql = self.function_fallback_sql(expression) 1171 1172 if not isinstance(expression.parent, exp.Cast): 1173 # StrToDate returns DATEs in other dialects (eg. postgres), so 1174 # this branch aims to improve the transpilation to clickhouse 1175 return self.cast_sql(exp.cast(expression, "DATE")) 1176 1177 return strtodate_sql 1178 1179 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1180 this = expression.this 1181 1182 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1183 return self.sql(this) 1184 1185 return super().cast_sql(expression, safe_prefix=safe_prefix) 1186 1187 def trycast_sql(self, expression: exp.TryCast) -> str: 1188 dtype = expression.to 1189 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1190 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1191 dtype.set("nullable", True) 1192 1193 return super().cast_sql(expression) 1194 1195 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1196 this = self.json_path_part(expression.this) 1197 return str(int(this) + 1) if is_int(this) else this 1198 1199 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1200 return f"AS {self.sql(expression, 'this')}" 1201 1202 def _any_to_has( 1203 self, 1204 expression: exp.EQ | exp.NEQ, 1205 default: t.Callable[[t.Any], str], 1206 prefix: str = "", 1207 ) -> str: 1208 if isinstance(expression.left, exp.Any): 1209 arr = expression.left 1210 this = expression.right 1211 elif isinstance(expression.right, exp.Any): 1212 arr = expression.right 1213 this = expression.left 1214 else: 1215 return default(expression) 1216 1217 return prefix + self.func("has", arr.this.unnest(), this) 1218 1219 def eq_sql(self, expression: exp.EQ) -> str: 1220 return self._any_to_has(expression, super().eq_sql) 1221 1222 def neq_sql(self, expression: exp.NEQ) -> str: 1223 return self._any_to_has(expression, super().neq_sql, "NOT ") 1224 1225 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1226 # Manually add a flag to make the search case-insensitive 1227 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1228 return self.func("match", expression.this, regex) 1229 1230 def datatype_sql(self, expression: exp.DataType) -> str: 1231 # String is the standard ClickHouse type, every other variant is just an alias. 1232 # Additionally, any supplied length parameter will be ignored. 1233 # 1234 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1235 if expression.this in self.STRING_TYPE_MAPPING: 1236 dtype = "String" 1237 else: 1238 dtype = super().datatype_sql(expression) 1239 1240 # This section changes the type to `Nullable(...)` if the following conditions hold: 1241 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1242 # and change their semantics 1243 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1244 # constraint: "Type of Map key must be a type, that can be represented by integer or 1245 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1246 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1247 parent = expression.parent 1248 nullable = expression.args.get("nullable") 1249 if nullable is True or ( 1250 nullable is None 1251 and not ( 1252 isinstance(parent, exp.DataType) 1253 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1254 and expression.index in (None, 0) 1255 ) 1256 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1257 ): 1258 dtype = f"Nullable({dtype})" 1259 1260 return dtype 1261 1262 def cte_sql(self, expression: exp.CTE) -> str: 1263 if expression.args.get("scalar"): 1264 this = self.sql(expression, "this") 1265 alias = self.sql(expression, "alias") 1266 return f"{this} AS {alias}" 1267 1268 return super().cte_sql(expression) 1269 1270 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1271 return super().after_limit_modifiers(expression) + [ 1272 ( 1273 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1274 if expression.args.get("settings") 1275 else "" 1276 ), 1277 ( 1278 self.seg("FORMAT ") + self.sql(expression, "format") 1279 if expression.args.get("format") 1280 else "" 1281 ), 1282 ] 1283 1284 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1285 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1286 1287 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1288 return f"ON CLUSTER {self.sql(expression, 'this')}" 1289 1290 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1291 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1292 exp.Properties.Location.POST_NAME 1293 ): 1294 this_name = self.sql( 1295 expression.this if isinstance(expression.this, exp.Schema) else expression, 1296 "this", 1297 ) 1298 this_properties = " ".join( 1299 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1300 ) 1301 this_schema = self.schema_columns_sql(expression.this) 1302 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1303 1304 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1305 1306 return super().createable_sql(expression, locations) 1307 1308 def create_sql(self, expression: exp.Create) -> str: 1309 # The comment property comes last in CTAS statements, i.e. after the query 1310 query = expression.expression 1311 if isinstance(query, exp.Query): 1312 comment_prop = expression.find(exp.SchemaCommentProperty) 1313 if comment_prop: 1314 comment_prop.pop() 1315 query.replace(exp.paren(query)) 1316 else: 1317 comment_prop = None 1318 1319 create_sql = super().create_sql(expression) 1320 1321 comment_sql = self.sql(comment_prop) 1322 comment_sql = f" {comment_sql}" if comment_sql else "" 1323 1324 return f"{create_sql}{comment_sql}" 1325 1326 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1327 this = self.indent(self.sql(expression, "this")) 1328 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1329 1330 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1331 this = self.sql(expression, "this") 1332 this = f" {this}" if this else "" 1333 expr = self.sql(expression, "expression") 1334 expr = f" {expr}" if expr else "" 1335 index_type = self.sql(expression, "index_type") 1336 index_type = f" TYPE {index_type}" if index_type else "" 1337 granularity = self.sql(expression, "granularity") 1338 granularity = f" GRANULARITY {granularity}" if granularity else "" 1339 1340 return f"INDEX{this}{expr}{index_type}{granularity}" 1341 1342 def partition_sql(self, expression: exp.Partition) -> str: 1343 return f"PARTITION {self.expressions(expression, flat=True)}" 1344 1345 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1346 return f"ID {self.sql(expression.this)}" 1347 1348 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1349 return ( 1350 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1351 ) 1352 1353 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1354 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1355 1356 def is_sql(self, expression: exp.Is) -> str: 1357 is_sql = super().is_sql(expression) 1358 1359 if isinstance(expression.parent, exp.Not): 1360 # value IS NOT NULL -> NOT (value IS NULL) 1361 is_sql = self.wrap(is_sql) 1362 1363 return is_sql 1364 1365 def in_sql(self, expression: exp.In) -> str: 1366 in_sql = super().in_sql(expression) 1367 1368 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1369 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1370 1371 return in_sql 1372 1373 def not_sql(self, expression: exp.Not) -> str: 1374 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1375 # let `GLOBAL IN` child interpose `NOT` 1376 return self.sql(expression, "this") 1377 1378 return super().not_sql(expression) 1379 1380 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1381 # If the VALUES clause contains tuples of expressions, we need to treat it 1382 # as a table since Clickhouse will automatically alias it as such. 1383 alias = expression.args.get("alias") 1384 1385 if alias and alias.args.get("columns") and expression.expressions: 1386 values = expression.expressions[0].expressions 1387 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1388 else: 1389 values_as_table = True 1390 1391 return super().values_sql(expression, values_as_table=values_as_table)
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
212 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 213 # Clickhouse allows VALUES to have an embedded structure e.g: 214 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 215 # In this case, we don't want to qualify the columns 216 values = expression.expressions[0].expressions 217 218 structure = ( 219 values[0] 220 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 221 else None 222 ) 223 if structure: 224 # Split each column definition into the column name e.g: 225 # 'person String, place String' -> ['person', 'place'] 226 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 227 column_aliases = [ 228 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 229 ] 230 else: 231 # Default column aliases in CH are "c1", "c2", etc. 232 column_aliases = [ 233 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 234 ] 235 236 return column_aliases
238 class Tokenizer(tokens.Tokenizer): 239 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 240 IDENTIFIERS = ['"', "`"] 241 IDENTIFIER_ESCAPES = ["\\"] 242 STRING_ESCAPES = ["'", "\\"] 243 BIT_STRINGS = [("0b", "")] 244 HEX_STRINGS = [("0x", ""), ("0X", "")] 245 HEREDOC_STRINGS = ["$"] 246 247 KEYWORDS = { 248 **tokens.Tokenizer.KEYWORDS, 249 ".:": TokenType.DOTCOLON, 250 "ATTACH": TokenType.COMMAND, 251 "DATE32": TokenType.DATE32, 252 "DATETIME64": TokenType.DATETIME64, 253 "DICTIONARY": TokenType.DICTIONARY, 254 "DYNAMIC": TokenType.DYNAMIC, 255 "ENUM8": TokenType.ENUM8, 256 "ENUM16": TokenType.ENUM16, 257 "EXCHANGE": TokenType.COMMAND, 258 "FINAL": TokenType.FINAL, 259 "FIXEDSTRING": TokenType.FIXEDSTRING, 260 "FLOAT32": TokenType.FLOAT, 261 "FLOAT64": TokenType.DOUBLE, 262 "GLOBAL": TokenType.GLOBAL, 263 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 264 "MAP": TokenType.MAP, 265 "NESTED": TokenType.NESTED, 266 "NOTHING": TokenType.NOTHING, 267 "SAMPLE": TokenType.TABLE_SAMPLE, 268 "TUPLE": TokenType.STRUCT, 269 "UINT16": TokenType.USMALLINT, 270 "UINT32": TokenType.UINT, 271 "UINT64": TokenType.UBIGINT, 272 "UINT8": TokenType.UTINYINT, 273 "IPV4": TokenType.IPV4, 274 "IPV6": TokenType.IPV6, 275 "POINT": TokenType.POINT, 276 "RING": TokenType.RING, 277 "LINESTRING": TokenType.LINESTRING, 278 "MULTILINESTRING": TokenType.MULTILINESTRING, 279 "POLYGON": TokenType.POLYGON, 280 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 281 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 282 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 283 "SYSTEM": TokenType.COMMAND, 284 "PREWHERE": TokenType.PREWHERE, 285 } 286 KEYWORDS.pop("/*+") 287 288 SINGLE_TOKENS = { 289 **tokens.Tokenizer.SINGLE_TOKENS, 290 "$": TokenType.HEREDOC_STRING, 291 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
293 class Parser(parser.Parser): 294 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 295 # * select x from t1 union all select x from t2 limit 1; 296 # * select x from t1 union all (select x from t2 limit 1); 297 MODIFIERS_ATTACHED_TO_SET_OP = False 298 INTERVAL_SPANS = False 299 OPTIONAL_ALIAS_TOKEN_CTE = False 300 301 FUNCTIONS = { 302 **parser.Parser.FUNCTIONS, 303 "ANY": exp.AnyValue.from_arg_list, 304 "ARRAYSUM": exp.ArraySum.from_arg_list, 305 "COUNTIF": _build_count_if, 306 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 307 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 308 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 309 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 310 "DATE_FORMAT": _build_date_format, 311 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 312 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 313 "FORMATDATETIME": _build_date_format, 314 "JSONEXTRACTSTRING": build_json_extract_path( 315 exp.JSONExtractScalar, zero_based_indexing=False 316 ), 317 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 318 "MAP": parser.build_var_map, 319 "MATCH": exp.RegexpLike.from_arg_list, 320 "RANDCANONICAL": exp.Rand.from_arg_list, 321 "STR_TO_DATE": _build_str_to_date, 322 "TUPLE": exp.Struct.from_arg_list, 323 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 324 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 325 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 326 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 327 "UNIQ": exp.ApproxDistinct.from_arg_list, 328 "XOR": lambda args: exp.Xor(expressions=args), 329 "MD5": exp.MD5Digest.from_arg_list, 330 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 331 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 332 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 333 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 334 } 335 FUNCTIONS.pop("TRANSFORM") 336 337 AGG_FUNCTIONS = { 338 "count", 339 "min", 340 "max", 341 "sum", 342 "avg", 343 "any", 344 "stddevPop", 345 "stddevSamp", 346 "varPop", 347 "varSamp", 348 "corr", 349 "covarPop", 350 "covarSamp", 351 "entropy", 352 "exponentialMovingAverage", 353 "intervalLengthSum", 354 "kolmogorovSmirnovTest", 355 "mannWhitneyUTest", 356 "median", 357 "rankCorr", 358 "sumKahan", 359 "studentTTest", 360 "welchTTest", 361 "anyHeavy", 362 "anyLast", 363 "boundingRatio", 364 "first_value", 365 "last_value", 366 "argMin", 367 "argMax", 368 "avgWeighted", 369 "topK", 370 "topKWeighted", 371 "deltaSum", 372 "deltaSumTimestamp", 373 "groupArray", 374 "groupArrayLast", 375 "groupUniqArray", 376 "groupArrayInsertAt", 377 "groupArrayMovingAvg", 378 "groupArrayMovingSum", 379 "groupArraySample", 380 "groupBitAnd", 381 "groupBitOr", 382 "groupBitXor", 383 "groupBitmap", 384 "groupBitmapAnd", 385 "groupBitmapOr", 386 "groupBitmapXor", 387 "sumWithOverflow", 388 "sumMap", 389 "minMap", 390 "maxMap", 391 "skewSamp", 392 "skewPop", 393 "kurtSamp", 394 "kurtPop", 395 "uniq", 396 "uniqExact", 397 "uniqCombined", 398 "uniqCombined64", 399 "uniqHLL12", 400 "uniqTheta", 401 "quantile", 402 "quantiles", 403 "quantileExact", 404 "quantilesExact", 405 "quantileExactLow", 406 "quantilesExactLow", 407 "quantileExactHigh", 408 "quantilesExactHigh", 409 "quantileExactWeighted", 410 "quantilesExactWeighted", 411 "quantileTiming", 412 "quantilesTiming", 413 "quantileTimingWeighted", 414 "quantilesTimingWeighted", 415 "quantileDeterministic", 416 "quantilesDeterministic", 417 "quantileTDigest", 418 "quantilesTDigest", 419 "quantileTDigestWeighted", 420 "quantilesTDigestWeighted", 421 "quantileBFloat16", 422 "quantilesBFloat16", 423 "quantileBFloat16Weighted", 424 "quantilesBFloat16Weighted", 425 "simpleLinearRegression", 426 "stochasticLinearRegression", 427 "stochasticLogisticRegression", 428 "categoricalInformationValue", 429 "contingency", 430 "cramersV", 431 "cramersVBiasCorrected", 432 "theilsU", 433 "maxIntersections", 434 "maxIntersectionsPosition", 435 "meanZTest", 436 "quantileInterpolatedWeighted", 437 "quantilesInterpolatedWeighted", 438 "quantileGK", 439 "quantilesGK", 440 "sparkBar", 441 "sumCount", 442 "largestTriangleThreeBuckets", 443 "histogram", 444 "sequenceMatch", 445 "sequenceCount", 446 "windowFunnel", 447 "retention", 448 "uniqUpTo", 449 "sequenceNextNode", 450 "exponentialTimeDecayedAvg", 451 } 452 453 AGG_FUNCTIONS_SUFFIXES = [ 454 "If", 455 "Array", 456 "ArrayIf", 457 "Map", 458 "SimpleState", 459 "State", 460 "Merge", 461 "MergeState", 462 "ForEach", 463 "Distinct", 464 "OrDefault", 465 "OrNull", 466 "Resample", 467 "ArgMin", 468 "ArgMax", 469 ] 470 471 FUNC_TOKENS = { 472 *parser.Parser.FUNC_TOKENS, 473 TokenType.AND, 474 TokenType.OR, 475 TokenType.SET, 476 } 477 478 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 479 480 ID_VAR_TOKENS = { 481 *parser.Parser.ID_VAR_TOKENS, 482 TokenType.LIKE, 483 } 484 485 AGG_FUNC_MAPPING = ( 486 lambda functions, suffixes: { 487 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 488 } 489 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 490 491 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 492 493 FUNCTION_PARSERS = { 494 **parser.Parser.FUNCTION_PARSERS, 495 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 496 "QUANTILE": lambda self: self._parse_quantile(), 497 "MEDIAN": lambda self: self._parse_quantile(), 498 "COLUMNS": lambda self: self._parse_columns(), 499 } 500 501 FUNCTION_PARSERS.pop("MATCH") 502 503 PROPERTY_PARSERS = { 504 **parser.Parser.PROPERTY_PARSERS, 505 "ENGINE": lambda self: self._parse_engine_property(), 506 } 507 PROPERTY_PARSERS.pop("DYNAMIC") 508 509 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 510 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 511 512 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 513 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 514 515 RANGE_PARSERS = { 516 **parser.Parser.RANGE_PARSERS, 517 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 518 } 519 520 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 521 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 522 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 523 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 524 525 JOIN_KINDS = { 526 *parser.Parser.JOIN_KINDS, 527 TokenType.ANY, 528 TokenType.ASOF, 529 TokenType.ARRAY, 530 } 531 532 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 533 TokenType.ANY, 534 TokenType.ARRAY, 535 TokenType.FINAL, 536 TokenType.FORMAT, 537 TokenType.SETTINGS, 538 } 539 540 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 541 TokenType.FORMAT, 542 } 543 544 LOG_DEFAULTS_TO_LN = True 545 546 QUERY_MODIFIER_PARSERS = { 547 **parser.Parser.QUERY_MODIFIER_PARSERS, 548 TokenType.SETTINGS: lambda self: ( 549 "settings", 550 self._advance() or self._parse_csv(self._parse_assignment), 551 ), 552 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 553 } 554 555 CONSTRAINT_PARSERS = { 556 **parser.Parser.CONSTRAINT_PARSERS, 557 "INDEX": lambda self: self._parse_index_constraint(), 558 "CODEC": lambda self: self._parse_compress(), 559 } 560 561 ALTER_PARSERS = { 562 **parser.Parser.ALTER_PARSERS, 563 "REPLACE": lambda self: self._parse_alter_table_replace(), 564 } 565 566 SCHEMA_UNNAMED_CONSTRAINTS = { 567 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 568 "INDEX", 569 } 570 571 PLACEHOLDER_PARSERS = { 572 **parser.Parser.PLACEHOLDER_PARSERS, 573 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 574 } 575 576 def _parse_engine_property(self) -> exp.EngineProperty: 577 self._match(TokenType.EQ) 578 return self.expression( 579 exp.EngineProperty, 580 this=self._parse_field(any_token=True, anonymous_func=True), 581 ) 582 583 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 584 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 585 return self._parse_lambda() 586 587 def _parse_types( 588 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 589 ) -> t.Optional[exp.Expression]: 590 dtype = super()._parse_types( 591 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 592 ) 593 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 594 # Mark every type as non-nullable which is ClickHouse's default, unless it's 595 # already marked as nullable. This marker helps us transpile types from other 596 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 597 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 598 # fail in ClickHouse without the `Nullable` type constructor. 599 dtype.set("nullable", False) 600 601 return dtype 602 603 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 604 index = self._index 605 this = self._parse_bitwise() 606 if self._match(TokenType.FROM): 607 self._retreat(index) 608 return super()._parse_extract() 609 610 # We return Anonymous here because extract and regexpExtract have different semantics, 611 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 612 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 613 # 614 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 615 self._match(TokenType.COMMA) 616 return self.expression( 617 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 618 ) 619 620 def _parse_assignment(self) -> t.Optional[exp.Expression]: 621 this = super()._parse_assignment() 622 623 if self._match(TokenType.PLACEHOLDER): 624 return self.expression( 625 exp.If, 626 this=this, 627 true=self._parse_assignment(), 628 false=self._match(TokenType.COLON) and self._parse_assignment(), 629 ) 630 631 return this 632 633 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 634 """ 635 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 636 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 637 """ 638 index = self._index 639 640 this = self._parse_id_var() 641 self._match(TokenType.COLON) 642 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 643 self._match_text_seq("IDENTIFIER") and "Identifier" 644 ) 645 646 if not kind: 647 self._retreat(index) 648 return None 649 elif not self._match(TokenType.R_BRACE): 650 self.raise_error("Expecting }") 651 652 if isinstance(this, exp.Identifier) and not this.quoted: 653 this = exp.var(this.name) 654 655 return self.expression(exp.Placeholder, this=this, kind=kind) 656 657 def _parse_bracket( 658 self, this: t.Optional[exp.Expression] = None 659 ) -> t.Optional[exp.Expression]: 660 l_brace = self._match(TokenType.L_BRACE, advance=False) 661 bracket = super()._parse_bracket(this) 662 663 if l_brace and isinstance(bracket, exp.Struct): 664 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 665 for expression in bracket.expressions: 666 if not isinstance(expression, exp.PropertyEQ): 667 break 668 669 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 670 varmap.args["values"].append("expressions", expression.expression) 671 672 return varmap 673 674 return bracket 675 676 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 677 this = super()._parse_in(this) 678 this.set("is_global", is_global) 679 return this 680 681 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 682 is_negated = self._match(TokenType.NOT) 683 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 684 return self.expression(exp.Not, this=this) if is_negated else this 685 686 def _parse_table( 687 self, 688 schema: bool = False, 689 joins: bool = False, 690 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 691 parse_bracket: bool = False, 692 is_db_reference: bool = False, 693 parse_partition: bool = False, 694 ) -> t.Optional[exp.Expression]: 695 this = super()._parse_table( 696 schema=schema, 697 joins=joins, 698 alias_tokens=alias_tokens, 699 parse_bracket=parse_bracket, 700 is_db_reference=is_db_reference, 701 ) 702 703 if isinstance(this, exp.Table): 704 inner = this.this 705 alias = this.args.get("alias") 706 707 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 708 alias.set("columns", [exp.to_identifier("generate_series")]) 709 710 if self._match(TokenType.FINAL): 711 this = self.expression(exp.Final, this=this) 712 713 return this 714 715 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 716 return super()._parse_position(haystack_first=True) 717 718 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 719 def _parse_cte(self) -> t.Optional[exp.CTE]: 720 # WITH <identifier> AS <subquery expression> 721 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 722 723 if not cte: 724 # WITH <expression> AS <identifier> 725 cte = self.expression( 726 exp.CTE, 727 this=self._parse_assignment(), 728 alias=self._parse_table_alias(), 729 scalar=True, 730 ) 731 732 return cte 733 734 def _parse_join_parts( 735 self, 736 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 737 is_global = self._match(TokenType.GLOBAL) and self._prev 738 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 739 740 if kind_pre: 741 kind = self._match_set(self.JOIN_KINDS) and self._prev 742 side = self._match_set(self.JOIN_SIDES) and self._prev 743 return is_global, side, kind 744 745 return ( 746 is_global, 747 self._match_set(self.JOIN_SIDES) and self._prev, 748 self._match_set(self.JOIN_KINDS) and self._prev, 749 ) 750 751 def _parse_join( 752 self, skip_join_token: bool = False, parse_bracket: bool = False 753 ) -> t.Optional[exp.Join]: 754 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 755 if join: 756 join.set("global", join.args.pop("method", None)) 757 758 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 759 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 760 if join.kind == "ARRAY": 761 for table in join.find_all(exp.Table): 762 table.replace(table.to_column()) 763 764 return join 765 766 def _parse_function( 767 self, 768 functions: t.Optional[t.Dict[str, t.Callable]] = None, 769 anonymous: bool = False, 770 optional_parens: bool = True, 771 any_token: bool = False, 772 ) -> t.Optional[exp.Expression]: 773 expr = super()._parse_function( 774 functions=functions, 775 anonymous=anonymous, 776 optional_parens=optional_parens, 777 any_token=any_token, 778 ) 779 780 func = expr.this if isinstance(expr, exp.Window) else expr 781 782 # Aggregate functions can be split in 2 parts: <func_name><suffix> 783 parts = ( 784 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 785 ) 786 787 if parts: 788 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 789 params = self._parse_func_params(anon_func) 790 791 kwargs = { 792 "this": anon_func.this, 793 "expressions": anon_func.expressions, 794 } 795 if parts[1]: 796 exp_class: t.Type[exp.Expression] = ( 797 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 798 ) 799 else: 800 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 801 802 kwargs["exp_class"] = exp_class 803 if params: 804 kwargs["params"] = params 805 806 func = self.expression(**kwargs) 807 808 if isinstance(expr, exp.Window): 809 # The window's func was parsed as Anonymous in base parser, fix its 810 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 811 expr.set("this", func) 812 elif params: 813 # Params have blocked super()._parse_function() from parsing the following window 814 # (if that exists) as they're standing between the function call and the window spec 815 expr = self._parse_window(func) 816 else: 817 expr = func 818 819 return expr 820 821 def _parse_func_params( 822 self, this: t.Optional[exp.Func] = None 823 ) -> t.Optional[t.List[exp.Expression]]: 824 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 825 return self._parse_csv(self._parse_lambda) 826 827 if self._match(TokenType.L_PAREN): 828 params = self._parse_csv(self._parse_lambda) 829 self._match_r_paren(this) 830 return params 831 832 return None 833 834 def _parse_quantile(self) -> exp.Quantile: 835 this = self._parse_lambda() 836 params = self._parse_func_params() 837 if params: 838 return self.expression(exp.Quantile, this=params[0], quantile=this) 839 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 840 841 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 842 return super()._parse_wrapped_id_vars(optional=True) 843 844 def _parse_primary_key( 845 self, wrapped_optional: bool = False, in_props: bool = False 846 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 847 return super()._parse_primary_key( 848 wrapped_optional=wrapped_optional or in_props, in_props=in_props 849 ) 850 851 def _parse_on_property(self) -> t.Optional[exp.Expression]: 852 index = self._index 853 if self._match_text_seq("CLUSTER"): 854 this = self._parse_string() or self._parse_id_var() 855 if this: 856 return self.expression(exp.OnCluster, this=this) 857 else: 858 self._retreat(index) 859 return None 860 861 def _parse_index_constraint( 862 self, kind: t.Optional[str] = None 863 ) -> exp.IndexColumnConstraint: 864 # INDEX name1 expr TYPE type1(args) GRANULARITY value 865 this = self._parse_id_var() 866 expression = self._parse_assignment() 867 868 index_type = self._match_text_seq("TYPE") and ( 869 self._parse_function() or self._parse_var() 870 ) 871 872 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 873 874 return self.expression( 875 exp.IndexColumnConstraint, 876 this=this, 877 expression=expression, 878 index_type=index_type, 879 granularity=granularity, 880 ) 881 882 def _parse_partition(self) -> t.Optional[exp.Partition]: 883 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 884 if not self._match(TokenType.PARTITION): 885 return None 886 887 if self._match_text_seq("ID"): 888 # Corresponds to the PARTITION ID <string_value> syntax 889 expressions: t.List[exp.Expression] = [ 890 self.expression(exp.PartitionId, this=self._parse_string()) 891 ] 892 else: 893 expressions = self._parse_expressions() 894 895 return self.expression(exp.Partition, expressions=expressions) 896 897 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 898 partition = self._parse_partition() 899 900 if not partition or not self._match(TokenType.FROM): 901 return None 902 903 return self.expression( 904 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 905 ) 906 907 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 908 if not self._match_text_seq("PROJECTION"): 909 return None 910 911 return self.expression( 912 exp.ProjectionDef, 913 this=self._parse_id_var(), 914 expression=self._parse_wrapped(self._parse_statement), 915 ) 916 917 def _parse_constraint(self) -> t.Optional[exp.Expression]: 918 return super()._parse_constraint() or self._parse_projection_def() 919 920 def _parse_alias( 921 self, this: t.Optional[exp.Expression], explicit: bool = False 922 ) -> t.Optional[exp.Expression]: 923 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 924 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 925 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 926 return this 927 928 return super()._parse_alias(this=this, explicit=explicit) 929 930 def _parse_expression(self) -> t.Optional[exp.Expression]: 931 this = super()._parse_expression() 932 933 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 934 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 935 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 936 self._match(TokenType.R_PAREN) 937 938 return this 939 940 def _parse_columns(self) -> exp.Expression: 941 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 942 943 while self._next and self._match_text_seq(")", "APPLY", "("): 944 self._match(TokenType.R_PAREN) 945 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 946 return this 947 948 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 949 value = super()._parse_value(values=values) 950 if not value: 951 return None 952 953 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 954 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 955 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 956 # but the final result is not altered by the extra parentheses. 957 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 958 expressions = value.expressions 959 if values and not isinstance(expressions[-1], exp.Tuple): 960 value.set( 961 "expressions", 962 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 963 ) 964 965 return value
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
967 class Generator(generator.Generator): 968 QUERY_HINTS = False 969 STRUCT_DELIMITER = ("(", ")") 970 NVL2_SUPPORTED = False 971 TABLESAMPLE_REQUIRES_PARENS = False 972 TABLESAMPLE_SIZE_IS_ROWS = False 973 TABLESAMPLE_KEYWORDS = "SAMPLE" 974 LAST_DAY_SUPPORTS_DATE_PART = False 975 CAN_IMPLEMENT_ARRAY_ANY = True 976 SUPPORTS_TO_NUMBER = False 977 JOIN_HINTS = False 978 TABLE_HINTS = False 979 GROUPINGS_SEP = "" 980 SET_OP_MODIFIERS = False 981 ARRAY_SIZE_NAME = "LENGTH" 982 WRAP_DERIVED_VALUES = False 983 984 STRING_TYPE_MAPPING = { 985 exp.DataType.Type.BLOB: "String", 986 exp.DataType.Type.CHAR: "String", 987 exp.DataType.Type.LONGBLOB: "String", 988 exp.DataType.Type.LONGTEXT: "String", 989 exp.DataType.Type.MEDIUMBLOB: "String", 990 exp.DataType.Type.MEDIUMTEXT: "String", 991 exp.DataType.Type.TINYBLOB: "String", 992 exp.DataType.Type.TINYTEXT: "String", 993 exp.DataType.Type.TEXT: "String", 994 exp.DataType.Type.VARBINARY: "String", 995 exp.DataType.Type.VARCHAR: "String", 996 } 997 998 SUPPORTED_JSON_PATH_PARTS = { 999 exp.JSONPathKey, 1000 exp.JSONPathRoot, 1001 exp.JSONPathSubscript, 1002 } 1003 1004 TYPE_MAPPING = { 1005 **generator.Generator.TYPE_MAPPING, 1006 **STRING_TYPE_MAPPING, 1007 exp.DataType.Type.ARRAY: "Array", 1008 exp.DataType.Type.BOOLEAN: "Bool", 1009 exp.DataType.Type.BIGINT: "Int64", 1010 exp.DataType.Type.DATE32: "Date32", 1011 exp.DataType.Type.DATETIME: "DateTime", 1012 exp.DataType.Type.DATETIME2: "DateTime", 1013 exp.DataType.Type.SMALLDATETIME: "DateTime", 1014 exp.DataType.Type.DATETIME64: "DateTime64", 1015 exp.DataType.Type.DECIMAL: "Decimal", 1016 exp.DataType.Type.DECIMAL32: "Decimal32", 1017 exp.DataType.Type.DECIMAL64: "Decimal64", 1018 exp.DataType.Type.DECIMAL128: "Decimal128", 1019 exp.DataType.Type.DECIMAL256: "Decimal256", 1020 exp.DataType.Type.TIMESTAMP: "DateTime", 1021 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1022 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1023 exp.DataType.Type.DOUBLE: "Float64", 1024 exp.DataType.Type.ENUM: "Enum", 1025 exp.DataType.Type.ENUM8: "Enum8", 1026 exp.DataType.Type.ENUM16: "Enum16", 1027 exp.DataType.Type.FIXEDSTRING: "FixedString", 1028 exp.DataType.Type.FLOAT: "Float32", 1029 exp.DataType.Type.INT: "Int32", 1030 exp.DataType.Type.MEDIUMINT: "Int32", 1031 exp.DataType.Type.INT128: "Int128", 1032 exp.DataType.Type.INT256: "Int256", 1033 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1034 exp.DataType.Type.MAP: "Map", 1035 exp.DataType.Type.NESTED: "Nested", 1036 exp.DataType.Type.NOTHING: "Nothing", 1037 exp.DataType.Type.SMALLINT: "Int16", 1038 exp.DataType.Type.STRUCT: "Tuple", 1039 exp.DataType.Type.TINYINT: "Int8", 1040 exp.DataType.Type.UBIGINT: "UInt64", 1041 exp.DataType.Type.UINT: "UInt32", 1042 exp.DataType.Type.UINT128: "UInt128", 1043 exp.DataType.Type.UINT256: "UInt256", 1044 exp.DataType.Type.USMALLINT: "UInt16", 1045 exp.DataType.Type.UTINYINT: "UInt8", 1046 exp.DataType.Type.IPV4: "IPv4", 1047 exp.DataType.Type.IPV6: "IPv6", 1048 exp.DataType.Type.POINT: "Point", 1049 exp.DataType.Type.RING: "Ring", 1050 exp.DataType.Type.LINESTRING: "LineString", 1051 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1052 exp.DataType.Type.POLYGON: "Polygon", 1053 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1054 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1055 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1056 exp.DataType.Type.DYNAMIC: "Dynamic", 1057 } 1058 1059 TRANSFORMS = { 1060 **generator.Generator.TRANSFORMS, 1061 exp.AnyValue: rename_func("any"), 1062 exp.ApproxDistinct: rename_func("uniq"), 1063 exp.ArrayConcat: rename_func("arrayConcat"), 1064 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1065 exp.ArraySum: rename_func("arraySum"), 1066 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1067 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1068 exp.Array: inline_array_sql, 1069 exp.CastToStrType: rename_func("CAST"), 1070 exp.CountIf: rename_func("countIf"), 1071 exp.CompressColumnConstraint: lambda self, 1072 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1073 exp.ComputedColumnConstraint: lambda self, 1074 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1075 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1076 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1077 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1078 exp.DateStrToDate: rename_func("toDate"), 1079 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1080 exp.Explode: rename_func("arrayJoin"), 1081 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1082 exp.IsNan: rename_func("isNaN"), 1083 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1084 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1085 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1086 exp.JSONPathKey: json_path_key_only_name, 1087 exp.JSONPathRoot: lambda *_: "", 1088 exp.Length: length_or_char_length_sql, 1089 exp.Map: _map_sql, 1090 exp.Median: rename_func("median"), 1091 exp.Nullif: rename_func("nullIf"), 1092 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1093 exp.Pivot: no_pivot_sql, 1094 exp.Quantile: _quantile_sql, 1095 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1096 exp.Rand: rename_func("randCanonical"), 1097 exp.StartsWith: rename_func("startsWith"), 1098 exp.StrPosition: lambda self, e: strposition_sql( 1099 self, 1100 e, 1101 func_name="POSITION", 1102 supports_position=True, 1103 use_ansi_position=False, 1104 ), 1105 exp.TimeToStr: lambda self, e: self.func( 1106 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1107 ), 1108 exp.TimeStrToTime: _timestrtotime_sql, 1109 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1110 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1111 exp.VarMap: _map_sql, 1112 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1113 exp.MD5Digest: rename_func("MD5"), 1114 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1115 exp.SHA: rename_func("SHA1"), 1116 exp.SHA2: sha256_sql, 1117 exp.UnixToTime: _unix_to_time_sql, 1118 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1119 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1120 exp.Variance: rename_func("varSamp"), 1121 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1122 exp.Stddev: rename_func("stddevSamp"), 1123 exp.Chr: rename_func("CHAR"), 1124 exp.Lag: lambda self, e: self.func( 1125 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1126 ), 1127 exp.Lead: lambda self, e: self.func( 1128 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1129 ), 1130 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1131 rename_func("editDistance") 1132 ), 1133 } 1134 1135 PROPERTIES_LOCATION = { 1136 **generator.Generator.PROPERTIES_LOCATION, 1137 exp.OnCluster: exp.Properties.Location.POST_NAME, 1138 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1139 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1140 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1141 } 1142 1143 # There's no list in docs, but it can be found in Clickhouse code 1144 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1145 ON_CLUSTER_TARGETS = { 1146 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1147 "DATABASE", 1148 "TABLE", 1149 "VIEW", 1150 "DICTIONARY", 1151 "INDEX", 1152 "FUNCTION", 1153 "NAMED COLLECTION", 1154 } 1155 1156 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1157 NON_NULLABLE_TYPES = { 1158 exp.DataType.Type.ARRAY, 1159 exp.DataType.Type.MAP, 1160 exp.DataType.Type.STRUCT, 1161 exp.DataType.Type.POINT, 1162 exp.DataType.Type.RING, 1163 exp.DataType.Type.LINESTRING, 1164 exp.DataType.Type.MULTILINESTRING, 1165 exp.DataType.Type.POLYGON, 1166 exp.DataType.Type.MULTIPOLYGON, 1167 } 1168 1169 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1170 strtodate_sql = self.function_fallback_sql(expression) 1171 1172 if not isinstance(expression.parent, exp.Cast): 1173 # StrToDate returns DATEs in other dialects (eg. postgres), so 1174 # this branch aims to improve the transpilation to clickhouse 1175 return self.cast_sql(exp.cast(expression, "DATE")) 1176 1177 return strtodate_sql 1178 1179 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1180 this = expression.this 1181 1182 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1183 return self.sql(this) 1184 1185 return super().cast_sql(expression, safe_prefix=safe_prefix) 1186 1187 def trycast_sql(self, expression: exp.TryCast) -> str: 1188 dtype = expression.to 1189 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1190 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1191 dtype.set("nullable", True) 1192 1193 return super().cast_sql(expression) 1194 1195 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1196 this = self.json_path_part(expression.this) 1197 return str(int(this) + 1) if is_int(this) else this 1198 1199 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1200 return f"AS {self.sql(expression, 'this')}" 1201 1202 def _any_to_has( 1203 self, 1204 expression: exp.EQ | exp.NEQ, 1205 default: t.Callable[[t.Any], str], 1206 prefix: str = "", 1207 ) -> str: 1208 if isinstance(expression.left, exp.Any): 1209 arr = expression.left 1210 this = expression.right 1211 elif isinstance(expression.right, exp.Any): 1212 arr = expression.right 1213 this = expression.left 1214 else: 1215 return default(expression) 1216 1217 return prefix + self.func("has", arr.this.unnest(), this) 1218 1219 def eq_sql(self, expression: exp.EQ) -> str: 1220 return self._any_to_has(expression, super().eq_sql) 1221 1222 def neq_sql(self, expression: exp.NEQ) -> str: 1223 return self._any_to_has(expression, super().neq_sql, "NOT ") 1224 1225 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1226 # Manually add a flag to make the search case-insensitive 1227 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1228 return self.func("match", expression.this, regex) 1229 1230 def datatype_sql(self, expression: exp.DataType) -> str: 1231 # String is the standard ClickHouse type, every other variant is just an alias. 1232 # Additionally, any supplied length parameter will be ignored. 1233 # 1234 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1235 if expression.this in self.STRING_TYPE_MAPPING: 1236 dtype = "String" 1237 else: 1238 dtype = super().datatype_sql(expression) 1239 1240 # This section changes the type to `Nullable(...)` if the following conditions hold: 1241 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1242 # and change their semantics 1243 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1244 # constraint: "Type of Map key must be a type, that can be represented by integer or 1245 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1246 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1247 parent = expression.parent 1248 nullable = expression.args.get("nullable") 1249 if nullable is True or ( 1250 nullable is None 1251 and not ( 1252 isinstance(parent, exp.DataType) 1253 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1254 and expression.index in (None, 0) 1255 ) 1256 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1257 ): 1258 dtype = f"Nullable({dtype})" 1259 1260 return dtype 1261 1262 def cte_sql(self, expression: exp.CTE) -> str: 1263 if expression.args.get("scalar"): 1264 this = self.sql(expression, "this") 1265 alias = self.sql(expression, "alias") 1266 return f"{this} AS {alias}" 1267 1268 return super().cte_sql(expression) 1269 1270 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1271 return super().after_limit_modifiers(expression) + [ 1272 ( 1273 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1274 if expression.args.get("settings") 1275 else "" 1276 ), 1277 ( 1278 self.seg("FORMAT ") + self.sql(expression, "format") 1279 if expression.args.get("format") 1280 else "" 1281 ), 1282 ] 1283 1284 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1285 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1286 1287 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1288 return f"ON CLUSTER {self.sql(expression, 'this')}" 1289 1290 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1291 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1292 exp.Properties.Location.POST_NAME 1293 ): 1294 this_name = self.sql( 1295 expression.this if isinstance(expression.this, exp.Schema) else expression, 1296 "this", 1297 ) 1298 this_properties = " ".join( 1299 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1300 ) 1301 this_schema = self.schema_columns_sql(expression.this) 1302 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1303 1304 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1305 1306 return super().createable_sql(expression, locations) 1307 1308 def create_sql(self, expression: exp.Create) -> str: 1309 # The comment property comes last in CTAS statements, i.e. after the query 1310 query = expression.expression 1311 if isinstance(query, exp.Query): 1312 comment_prop = expression.find(exp.SchemaCommentProperty) 1313 if comment_prop: 1314 comment_prop.pop() 1315 query.replace(exp.paren(query)) 1316 else: 1317 comment_prop = None 1318 1319 create_sql = super().create_sql(expression) 1320 1321 comment_sql = self.sql(comment_prop) 1322 comment_sql = f" {comment_sql}" if comment_sql else "" 1323 1324 return f"{create_sql}{comment_sql}" 1325 1326 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1327 this = self.indent(self.sql(expression, "this")) 1328 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1329 1330 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1331 this = self.sql(expression, "this") 1332 this = f" {this}" if this else "" 1333 expr = self.sql(expression, "expression") 1334 expr = f" {expr}" if expr else "" 1335 index_type = self.sql(expression, "index_type") 1336 index_type = f" TYPE {index_type}" if index_type else "" 1337 granularity = self.sql(expression, "granularity") 1338 granularity = f" GRANULARITY {granularity}" if granularity else "" 1339 1340 return f"INDEX{this}{expr}{index_type}{granularity}" 1341 1342 def partition_sql(self, expression: exp.Partition) -> str: 1343 return f"PARTITION {self.expressions(expression, flat=True)}" 1344 1345 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1346 return f"ID {self.sql(expression.this)}" 1347 1348 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1349 return ( 1350 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1351 ) 1352 1353 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1354 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1355 1356 def is_sql(self, expression: exp.Is) -> str: 1357 is_sql = super().is_sql(expression) 1358 1359 if isinstance(expression.parent, exp.Not): 1360 # value IS NOT NULL -> NOT (value IS NULL) 1361 is_sql = self.wrap(is_sql) 1362 1363 return is_sql 1364 1365 def in_sql(self, expression: exp.In) -> str: 1366 in_sql = super().in_sql(expression) 1367 1368 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1369 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1370 1371 return in_sql 1372 1373 def not_sql(self, expression: exp.Not) -> str: 1374 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1375 # let `GLOBAL IN` child interpose `NOT` 1376 return self.sql(expression, "this") 1377 1378 return super().not_sql(expression) 1379 1380 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1381 # If the VALUES clause contains tuples of expressions, we need to treat it 1382 # as a table since Clickhouse will automatically alias it as such. 1383 alias = expression.args.get("alias") 1384 1385 if alias and alias.args.get("columns") and expression.expressions: 1386 values = expression.expressions[0].expressions 1387 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1388 else: 1389 values_as_table = True 1390 1391 return super().values_sql(expression, values_as_table=values_as_table)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1169 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1170 strtodate_sql = self.function_fallback_sql(expression) 1171 1172 if not isinstance(expression.parent, exp.Cast): 1173 # StrToDate returns DATEs in other dialects (eg. postgres), so 1174 # this branch aims to improve the transpilation to clickhouse 1175 return self.cast_sql(exp.cast(expression, "DATE")) 1176 1177 return strtodate_sql
1179 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1180 this = expression.this 1181 1182 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1183 return self.sql(this) 1184 1185 return super().cast_sql(expression, safe_prefix=safe_prefix)
1187 def trycast_sql(self, expression: exp.TryCast) -> str: 1188 dtype = expression.to 1189 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1190 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1191 dtype.set("nullable", True) 1192 1193 return super().cast_sql(expression)
1230 def datatype_sql(self, expression: exp.DataType) -> str: 1231 # String is the standard ClickHouse type, every other variant is just an alias. 1232 # Additionally, any supplied length parameter will be ignored. 1233 # 1234 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1235 if expression.this in self.STRING_TYPE_MAPPING: 1236 dtype = "String" 1237 else: 1238 dtype = super().datatype_sql(expression) 1239 1240 # This section changes the type to `Nullable(...)` if the following conditions hold: 1241 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1242 # and change their semantics 1243 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1244 # constraint: "Type of Map key must be a type, that can be represented by integer or 1245 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1246 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1247 parent = expression.parent 1248 nullable = expression.args.get("nullable") 1249 if nullable is True or ( 1250 nullable is None 1251 and not ( 1252 isinstance(parent, exp.DataType) 1253 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1254 and expression.index in (None, 0) 1255 ) 1256 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1257 ): 1258 dtype = f"Nullable({dtype})" 1259 1260 return dtype
1270 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1271 return super().after_limit_modifiers(expression) + [ 1272 ( 1273 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1274 if expression.args.get("settings") 1275 else "" 1276 ), 1277 ( 1278 self.seg("FORMAT ") + self.sql(expression, "format") 1279 if expression.args.get("format") 1280 else "" 1281 ), 1282 ]
1290 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1291 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1292 exp.Properties.Location.POST_NAME 1293 ): 1294 this_name = self.sql( 1295 expression.this if isinstance(expression.this, exp.Schema) else expression, 1296 "this", 1297 ) 1298 this_properties = " ".join( 1299 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1300 ) 1301 this_schema = self.schema_columns_sql(expression.this) 1302 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1303 1304 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1305 1306 return super().createable_sql(expression, locations)
1308 def create_sql(self, expression: exp.Create) -> str: 1309 # The comment property comes last in CTAS statements, i.e. after the query 1310 query = expression.expression 1311 if isinstance(query, exp.Query): 1312 comment_prop = expression.find(exp.SchemaCommentProperty) 1313 if comment_prop: 1314 comment_prop.pop() 1315 query.replace(exp.paren(query)) 1316 else: 1317 comment_prop = None 1318 1319 create_sql = super().create_sql(expression) 1320 1321 comment_sql = self.sql(comment_prop) 1322 comment_sql = f" {comment_sql}" if comment_sql else "" 1323 1324 return f"{create_sql}{comment_sql}"
1330 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1331 this = self.sql(expression, "this") 1332 this = f" {this}" if this else "" 1333 expr = self.sql(expression, "expression") 1334 expr = f" {expr}" if expr else "" 1335 index_type = self.sql(expression, "index_type") 1336 index_type = f" TYPE {index_type}" if index_type else "" 1337 granularity = self.sql(expression, "granularity") 1338 granularity = f" GRANULARITY {granularity}" if granularity else "" 1339 1340 return f"INDEX{this}{expr}{index_type}{granularity}"
1380 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1381 # If the VALUES clause contains tuples of expressions, we need to treat it 1382 # as a table since Clickhouse will automatically alias it as such. 1383 alias = expression.args.get("alias") 1384 1385 if alias and alias.args.get("columns") and expression.expressions: 1386 values = expression.expressions[0].expressions 1387 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1388 else: 1389 values_as_table = True 1390 1391 return super().values_sql(expression, values_as_table=values_as_table)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_WINDOW_EXCLUDE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql